import json from pymongo import MongoClient from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain_community.vectorstores import MongoDBAtlasVectorSearch from langchain.chains.question_answering import load_qa_chain from langchain.schema import Document from langchain.prompts import PromptTemplate import env # Ensure key_param contains MongoDB URI and Google API key google_api_key = env.GOOGLE_API_KEY # Load data from JSON file def load_data(filepath): with open(filepath, 'r', encoding='utf-8') as file: data = json.load(file) return data # Convert JSON entries to Document format for embeddings def json_to_documents(data): documents = [] for entry in data: # Extract key fields to create a full text for embedding title = entry.get("title", "") chalet_title = entry.get("chalet_title", "") description = entry.get("description", "") chalet = entry.get("chalet", {}) chalet_title_full = chalet.get("title", "") address_city = chalet.get("address", {}).get("city", "") address_area = chalet.get("address", {}).get("area", "") total_review_points = chalet.get("totalReview", {}).get("points", "") total_review_text = chalet.get("totalReview", {}).get("text", "") cancel_policy = chalet.get("cancelPolicy", "") unit_custom_title = entry.get("unit_custom_title", "") checkin_hour = entry.get("checkinHour", "") checkout_hour = entry.get("checkoutHour", "") final_price = entry.get("final_price", "") extra_description = entry.get("extraDescription", []) extra_description_text = "\n".join( [f"{desc['header']}: {', '.join(desc['content'])}" for desc in extra_description] ) full_text = ( f"{title}\n" f"{chalet_title}\n" f"{description}\n" f"{chalet_title_full}\n" f"{address_city}\n" f"{address_area}\n" f"التقييم: {total_review_points}\n" f"اجمالي التقييم: {total_review_text}\n" f"شروط الغاء الحجز: {cancel_policy}\n" f"{unit_custom_title}\n" f"تسجيل دخول: {checkin_hour}\n" f"تسجيل خروج: {checkout_hour}\n" f"السعر: {final_price}\n" f"{extra_description_text}" ) # Create Document object with text and metadata documents.append(Document(page_content=full_text, metadata=entry)) return documents # MongoDB setup client = MongoClient(env.MONGO_URI) db = "riyadhMap" collectionName = "mapData" collection = client[db][collectionName] embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key) # NOTE: the following code used to load data from JSON file and create a vector store. # data = load_data("data.json") # documents = json_to_documents(data) # vectorStore = MongoDBAtlasVectorSearch.from_documents(documents, embeddings, collection=collection) vectorStore = MongoDBAtlasVectorSearch(collection, embeddings) # Language Model for RetrievalQA llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=google_api_key) prompt_template = """ As a friendly tourism agent, suggest the best possible options based on the client's input. Your answer should be based on the text input language but mostly in Arabic or English. If there is no exact match, provide the top three closest possible information. Each context will provide `title` (e.g. كود الوحدة (xxxxx)), therefore, always include `title` in your answer for better user experience from the `context`. Be convincing and friendly in your response and use Saudi accent if the text in Arabic.\n\n Context:\n{context}\n Question:\n{question}\n Answer: """ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) # Use the custom prompt with RetrievalQA to guide the model retriever = vectorStore.as_retriever() qa = load_qa_chain(llm, chain_type="stuff", prompt=prompt) # Define a query function for vector-based similarity search and RAG def query_data(query): # Perform similarity search docs = vectorStore.similarity_search(query, k=10) # Increase the number of retrieved documents # Join document content for RAG context context = "\n\n".join([doc.page_content for doc in docs]) if docs else "No relevant documents found." properties = [] for doc in docs: metadata = doc.metadata details = { "title": metadata.get("title", ""), "chalet_title": metadata.get("chalet_title", ""), "final_price": metadata.get("final_price", ""), "address": metadata.get("chalet", {}).get("address", {}).get("city", ""), "area": metadata.get("chalet", {}).get("address", {}).get("area", ""), "total_review_points": metadata.get("chalet", {}).get("totalReview", {}).get("points", ""), "total_review_text": metadata.get("chalet", {}).get("totalReview", {}).get("text", ""), "cancel_policy": metadata.get("chalet", {}).get("cancelPolicy", ""), "unit_custom_title": metadata.get("unit_custom_title", ""), "checkin_hour": metadata.get("checkinHour", ""), "checkout_hour": metadata.get("checkoutHour", ""), "extra_description": metadata.get("extraDescription", []), "lat": metadata.get("chalet", {}).get("lat", ""), "lng": metadata.get("chalet", {}).get("lng", "") } properties.append(details) # Generate QA response with RAG retriever_output = qa.run(input_documents=docs, question=query) return properties, retriever_output # Example of querying the data # query = "شقة قريبة من البوليفارد بسعر مناسب ريال غرفتين نوم مع تسجيل دخول ذاتي" # as_output, retriever_output = query_data(query) # print("Atlas Vector Search Output: ", as_output) # print("RAG QA Output: ", retriever_output)