ND-Scrapper / Mongo /Noonfood_location.py
viraj
Initial Commit
9e6917b
import requests
import json
from urllib.parse import urljoin
import pymongo
from pymongo import MongoClient
import certifi
from Mongo.Noonfood_Mongo_URL_From_location import Mongo_location_URLS
def mutiple_url_location(lat , lng):
def get_initial_data(lat, lng):
initial_url = "https://food.noon.com/_svc/customer-v1/customer/public-area-serviceable"
initial_payload = {"lat": str(lat), "lng": str(lng)}
initial_headers = {
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
"Content-Type": "application/json",
"Origin": "https://food.noon.com",
"Referer": "https://food.noon.com/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"X-Content": "desktop",
"X-Experience": "food",
"X-Locale": "en-ae",
"X-Mp": "noon",
"X-Platform": "web",
"X-Visitor-Id": "1072f8c2-cfd6-4734-8795-d637de61fba1",
}
try:
response = requests.post(initial_url, json=initial_payload, headers=initial_headers)
response.raise_for_status()
json_data = response.json()
print(f"Initial Response JSON: {json_data}")
new_lat = json_data.get('lat')
new_lng = json_data.get('lng')
if new_lat and new_lng:
return json_data, new_lat, new_lng
else:
print("lat or lng not found in the initial response.")
return None, None, None
except requests.exceptions.RequestException as e:
print(f"Request failed: {e}")
return None, None, None
def get_urls_from_json(json_data):
url_list = []
try:
results = json_data.get("results", [])
if len(results) > 5:
banners = results[5]['modules'][0].get("banners", [])
for n in range(min(20, len(banners))):
try:
base_url = 'https://food.noon.com/'
want_data = banners[n].get("linkUrl")
if want_data:
full_url = urljoin(base_url, want_data)
url_list.append(full_url)
except Exception as e:
print(f"Error occurred while extracting URLs: {e}")
break
else:
print("Insufficient data in 'results'")
except Exception as e:
print(f"Error parsing JSON data: {e}")
return url_list
def store_data_in_mongo(json_data, url_list):
try:
# MongoDB connection settings
client = MongoClient(
"mongodb+srv://dipenigenerate:[email protected]",
tlsCAFile=certifi.where()
)
db = client['Restaurants_in_dubai']
collection = db['noonfood_link']
# Create the document to insert
document = {
"initial_response": json_data,
"url_list": url_list
}
# Insert the document into the collection
result = collection.insert_one(document)
print(f"Data inserted with id: {result.inserted_id}")
except Exception as e:
print(f"Failed to store data in MongoDB: {e}")
def fetch_urls(lat, lng):
json_data, new_lat, new_lng = get_initial_data(lat, lng)
if new_lat and new_lng:
new_url = "https://food.noon.com/_svc/mp-food-api-catalog/api/"
new_headers = {
"method": "GET",
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
"Content-Type": "application/json",
"Origin": "https://food.noon.com",
"Referer": "https://food.noon.com/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"X-Content": "desktop",
"X-Experience": "food",
"X-Locale": "en-ae",
"X-Mp": "noon",
"X-Platform": "web",
"X-lat": f"{new_lat}",
"X-lng": f"{new_lng}"
}
try:
new_response = requests.get(new_url, headers=new_headers)
new_response.raise_for_status()
new_json_data = new_response.json()
# Extract URLs from the JSON data
url_list = get_urls_from_json(new_json_data)
# Store the initial JSON response and URLs in MongoDB
store_data_in_mongo(json_data, url_list)
return url_list
except requests.exceptions.RequestException as e:
print(f"Failed to retrieve new content from the URL: {e}")
return []
else:
return []
urls = fetch_urls(lat, lng)
if urls:
print("Fetched URLs:" , urls)
for url in urls:
url = url+"/"
Mongo_location_URLS(url,lat,lng)
# if __name__ == "__main__":
# lat = int(input("Enter the latitude: "))
# lng = int(input("Enter the longitude: "))
# urls = fetch_urls(lat, lng)
# if urls:
# print("Fetched URLs:" , urls)
# for url in urls:
# url = url+"/"
# Mongo_location_URLS(url,lat,lng)
# else:
# print("No URLs fetched.")