_abdullahid_ commited on
Commit
a2a4a1d
·
1 Parent(s): 713a08d
Files changed (3) hide show
  1. app.py +76 -0
  2. map_search.py +134 -0
  3. requirements.txt +100 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from map_search import query_data
2
+ import gradio as gr
3
+ from gradio.themes.base import Base
4
+ import pandas as pd
5
+ import plotly.graph_objects as go
6
+
7
+ def query_data_with_map(question):
8
+ if question:
9
+ # Assuming query_data returns a tuple: (list_of_arrays, text_from_gemini)
10
+ list_of_arrays, text_from_gemini = query_data(question)
11
+
12
+ # Convert list_of_arrays to a DataFrame
13
+ df = pd.DataFrame(list_of_arrays, columns=['title', 'chalet_title', 'final_price', 'unit_custom_title', 'lat', 'lng'])
14
+
15
+ text_list = [(row['title'], row['unit_custom_title'], row['chalet_title'], row['final_price']) for index, row in df.iterrows()]
16
+ fig = go.Figure(go.Scattermapbox(
17
+ customdata=text_list,
18
+ lat=df['lat'].tolist(),
19
+ lon=df['lng'].tolist(),
20
+ mode='markers',
21
+ marker=go.scattermapbox.Marker(
22
+ size=6
23
+ ),
24
+ hoverinfo="text",
25
+ hovertemplate=(
26
+ '<b>Title</b>: %{customdata[0]}<br>'
27
+ '<b>Unit Custom Title</b>: %{customdata[1]}<br>'
28
+ '<b>Chalet Title</b>: %{customdata[2]}<br>'
29
+ '<b>Final Price</b>: SAR %{customdata[3]}'
30
+ )
31
+ ))
32
+ else:
33
+ # Create an empty map
34
+ fig = go.Figure(go.Scattermapbox(
35
+ lat=[],
36
+ lon=[],
37
+ mode='markers',
38
+ marker=go.scattermapbox.Marker(
39
+ size=20
40
+ )
41
+ ))
42
+ text_from_gemini = ""
43
+
44
+ fig.update_layout(
45
+ mapbox_style="open-street-map",
46
+ hovermode='closest',
47
+ mapbox=dict(
48
+ bearing=0,
49
+ center=go.layout.mapbox.Center(
50
+ lat=24.7136, # Latitude for Riyadh
51
+ lon=46.6753 # Longitude for Riyadh
52
+ ),
53
+ pitch=0,
54
+ zoom=10
55
+ )
56
+ )
57
+ return fig, text_from_gemini
58
+
59
+ with gr.Blocks(theme=Base(), title="Riyadh Entertainment Map powered by Smart Search System using Vector Search + RAG") as demo:
60
+ gr.Markdown(
61
+ """
62
+ # Smart Search System using Atlas Vector Search + RAG Architecture
63
+ """)
64
+ textbox = gr.Textbox(label="Enter your query here", lines=1)
65
+ with gr.Row():
66
+ button = gr.Button("Search", variant="primary")
67
+ with gr.Column():
68
+ output1 = gr.Plot(label="Map Output")
69
+ output2 = gr.Textbox(lines=1, max_lines=10, label="Output generated by chaining Atlas Vector Search to Langchain's `load_qa_chain` + Gemini flash 1.5 LLM:")
70
+
71
+ # Load the empty map when the app starts
72
+ demo.load(query_data_with_map, inputs=[textbox], outputs=[output1, output2])
73
+ # Call query_data_with_map function upon clicking the Submit button
74
+ button.click(query_data_with_map, textbox, outputs=[output1, output2])
75
+
76
+ demo.launch(share=True)
map_search.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pymongo import MongoClient
3
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
4
+ from langchain_community.vectorstores import MongoDBAtlasVectorSearch
5
+ from langchain.chains.question_answering import load_qa_chain
6
+ from langchain.schema import Document
7
+ from langchain.prompts import PromptTemplate
8
+ import env # Ensure key_param contains MongoDB URI and Google API key
9
+
10
+ google_api_key = env.GOOGLE_API_KEY
11
+
12
+ # Load data from JSON file
13
+ def load_data(filepath):
14
+ with open(filepath, 'r', encoding='utf-8') as file:
15
+ data = json.load(file)
16
+ return data
17
+
18
+ # Convert JSON entries to Document format for embeddings
19
+ def json_to_documents(data):
20
+ documents = []
21
+ for entry in data:
22
+ # Extract key fields to create a full text for embedding
23
+ title = entry.get("title", "")
24
+ chalet_title = entry.get("chalet_title", "")
25
+ description = entry.get("description", "")
26
+ chalet = entry.get("chalet", {})
27
+ chalet_title_full = chalet.get("title", "")
28
+ address_city = chalet.get("address", {}).get("city", "")
29
+ address_area = chalet.get("address", {}).get("area", "")
30
+ total_review_points = chalet.get("totalReview", {}).get("points", "")
31
+ total_review_text = chalet.get("totalReview", {}).get("text", "")
32
+ cancel_policy = chalet.get("cancelPolicy", "")
33
+ unit_custom_title = entry.get("unit_custom_title", "")
34
+ checkin_hour = entry.get("checkinHour", "")
35
+ checkout_hour = entry.get("checkoutHour", "")
36
+ final_price = entry.get("final_price", "")
37
+
38
+ extra_description = entry.get("extraDescription", [])
39
+ extra_description_text = "\n".join(
40
+ [f"{desc['header']}: {', '.join(desc['content'])}" for desc in extra_description]
41
+ )
42
+
43
+ full_text = (
44
+ f"{title}\n"
45
+ f"{chalet_title}\n"
46
+ f"{description}\n"
47
+ f"{chalet_title_full}\n"
48
+ f"{address_city}\n"
49
+ f"{address_area}\n"
50
+ f"التقييم: {total_review_points}\n"
51
+ f"اجمالي التقييم: {total_review_text}\n"
52
+ f"شروط الغاء الحجز: {cancel_policy}\n"
53
+ f"{unit_custom_title}\n"
54
+ f"تسجيل دخول: {checkin_hour}\n"
55
+ f"تسجيل خروج: {checkout_hour}\n"
56
+ f"السعر: {final_price}\n"
57
+ f"{extra_description_text}"
58
+ )
59
+
60
+ # Create Document object with text and metadata
61
+ documents.append(Document(page_content=full_text, metadata=entry))
62
+ return documents
63
+
64
+
65
+ # MongoDB setup
66
+ client = MongoClient(env.MONGO_URI)
67
+ db = "riyadhMap"
68
+ collectionName = "mapData"
69
+ collection = client[db][collectionName]
70
+
71
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=google_api_key)
72
+
73
+ # NOTE: the following code used to load data from JSON file and create a vector store.
74
+ # data = load_data("data.json")
75
+ # documents = json_to_documents(data)
76
+ # vectorStore = MongoDBAtlasVectorSearch.from_documents(documents, embeddings, collection=collection)
77
+
78
+
79
+ vectorStore = MongoDBAtlasVectorSearch(collection, embeddings)
80
+
81
+ # Language Model for RetrievalQA
82
+ llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", google_api_key=google_api_key)
83
+
84
+ prompt_template = """
85
+ As a friendly tourism agent, suggest the best possible options based on the client's input. Your answer should be based on the text input language but mostly in Arabic or English. If there is no exact match, provide the top three closest possible information. Each context will provide `title` (e.g. كود الوحدة (xxxxx)), therefore, always include `title` in your answer for better user experience from the `context`. Be convincing and friendly in your response and use Saudi accent if the text in Arabic.\n\n
86
+ Context:\n{context}\n
87
+ Question:\n{question}\n
88
+ Answer:
89
+ """
90
+
91
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
92
+
93
+ # Use the custom prompt with RetrievalQA to guide the model
94
+ retriever = vectorStore.as_retriever()
95
+ qa = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
96
+
97
+ # Define a query function for vector-based similarity search and RAG
98
+ def query_data(query):
99
+ # Perform similarity search
100
+ docs = vectorStore.similarity_search(query, k=10) # Increase the number of retrieved documents
101
+
102
+ # Join document content for RAG context
103
+ context = "\n\n".join([doc.page_content for doc in docs]) if docs else "No relevant documents found."
104
+ properties = []
105
+ for doc in docs:
106
+ metadata = doc.metadata
107
+ details = {
108
+ "title": metadata.get("title", ""),
109
+ "chalet_title": metadata.get("chalet_title", ""),
110
+ "final_price": metadata.get("final_price", ""),
111
+ "address": metadata.get("chalet", {}).get("address", {}).get("city", ""),
112
+ "area": metadata.get("chalet", {}).get("address", {}).get("area", ""),
113
+ "total_review_points": metadata.get("chalet", {}).get("totalReview", {}).get("points", ""),
114
+ "total_review_text": metadata.get("chalet", {}).get("totalReview", {}).get("text", ""),
115
+ "cancel_policy": metadata.get("chalet", {}).get("cancelPolicy", ""),
116
+ "unit_custom_title": metadata.get("unit_custom_title", ""),
117
+ "checkin_hour": metadata.get("checkinHour", ""),
118
+ "checkout_hour": metadata.get("checkoutHour", ""),
119
+ "extra_description": metadata.get("extraDescription", []),
120
+ "lat": metadata.get("chalet", {}).get("lat", ""),
121
+ "lng": metadata.get("chalet", {}).get("lng", "")
122
+ }
123
+ properties.append(details)
124
+
125
+ # Generate QA response with RAG
126
+ retriever_output = qa.run(input_documents=docs, question=query)
127
+ return properties, retriever_output
128
+
129
+ # Example of querying the data
130
+ # query = "شقة قريبة من البوليفارد بسعر مناسب ريال غرفتين نوم مع تسجيل دخول ذاتي"
131
+ # as_output, retriever_output = query_data(query)
132
+
133
+ # print("Atlas Vector Search Output: ", as_output)
134
+ # print("RAG QA Output: ", retriever_output)
requirements.txt ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohappyeyeballs==2.4.3
3
+ aiohttp==3.10.10
4
+ aiosignal==1.3.1
5
+ annotated-types==0.7.0
6
+ anyio==4.6.2.post1
7
+ attrs==24.2.0
8
+ cachetools==5.5.0
9
+ certifi==2024.8.30
10
+ charset-normalizer==3.4.0
11
+ click==8.1.7
12
+ colorama==0.4.6
13
+ dataclasses-json==0.6.7
14
+ dnspython==2.7.0
15
+ fastapi==0.115.4
16
+ ffmpy==0.4.0
17
+ filelock==3.16.1
18
+ frozenlist==1.5.0
19
+ fsspec==2024.10.0
20
+ google-ai-generativelanguage==0.6.10
21
+ google-api-core==2.23.0
22
+ google-api-python-client==2.151.0
23
+ google-auth==2.36.0
24
+ google-auth-httplib2==0.2.0
25
+ google-generativeai==0.8.3
26
+ googleapis-common-protos==1.65.0
27
+ gradio==5.5.0
28
+ gradio-client==1.4.2
29
+ greenlet==3.1.1
30
+ grpcio==1.67.1
31
+ grpcio-status==1.67.1
32
+ h11==0.14.0
33
+ httpcore==1.0.6
34
+ httplib2==0.22.0
35
+ httpx==0.27.2
36
+ httpx-sse==0.4.0
37
+ huggingface-hub==0.26.2
38
+ idna==3.10
39
+ jinja2==3.1.4
40
+ jsonpatch==1.33
41
+ jsonpointer==3.0.0
42
+ langchain==0.3.7
43
+ langchain-community==0.3.5
44
+ langchain-core==0.3.15
45
+ langchain-google-genai==2.0.4
46
+ langchain-text-splitters==0.3.2
47
+ langsmith==0.1.142
48
+ markdown-it-py==3.0.0
49
+ markupsafe==2.1.5
50
+ marshmallow==3.23.1
51
+ mdurl==0.1.2
52
+ multidict==6.1.0
53
+ mypy-extensions==1.0.0
54
+ numpy==1.26.4
55
+ orjson==3.10.11
56
+ packaging==24.2
57
+ pandas==2.2.3
58
+ pillow==11.0.0
59
+ plotly==5.24.1
60
+ propcache==0.2.0
61
+ proto-plus==1.25.0
62
+ protobuf==5.28.3
63
+ pyasn1==0.6.1
64
+ pyasn1-modules==0.4.1
65
+ pydantic==2.9.2
66
+ pydantic-core==2.23.4
67
+ pydantic-settings==2.6.1
68
+ pydub==0.25.1
69
+ pygments==2.18.0
70
+ pymongo==4.10.1
71
+ pyparsing==3.2.0
72
+ python-dateutil==2.9.0.post0
73
+ python-dotenv==1.0.1
74
+ python-multipart==0.0.12
75
+ pytz==2024.2
76
+ pyyaml==6.0.2
77
+ requests==2.32.3
78
+ requests-toolbelt==1.0.0
79
+ rich==13.9.4
80
+ rsa==4.9
81
+ ruff==0.7.3
82
+ safehttpx==0.1.1
83
+ semantic-version==2.10.0
84
+ shellingham==1.5.4
85
+ six==1.16.0
86
+ sniffio==1.3.1
87
+ sqlalchemy==2.0.35
88
+ starlette==0.41.2
89
+ tenacity==9.0.0
90
+ tomlkit==0.12.0
91
+ tqdm==4.67.0
92
+ typer==0.13.0
93
+ typing-extensions==4.12.2
94
+ typing-inspect==0.9.0
95
+ tzdata==2024.2
96
+ uritemplate==4.1.1
97
+ urllib3==2.2.3
98
+ uvicorn==0.32.0
99
+ websockets==12.0
100
+ yarl==1.17.1