import os from groq import Groq import gradio as gr import numpy as np from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import sqlite3 import pandas as pd from tqdm import tqdm # Define the SentenceTransformer model globally model = SentenceTransformer('paraphrase-MiniLM-L6-v2') # Get the Groq API key from environment variables client = Groq( api_key="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" ) # Generate user embedding using the globally defined model def get_user_embedding(query): try: return model.encode(query) except Exception as e: print(f"Error generating embedding: {e}") return np.zeros(384) # Return a zero-vector of the correct size if there is an error con = sqlite3.connect("file::memory:?cache=shared", check_same_thread=False) con.row_factory = sqlite3.Row cur = con.cursor() # create table if not exists cur.execute(""" CREATE TABLE IF NOT EXISTS places ( Place_Id INTEGER PRIMARY KEY, Place_Name TEXT NOT NULL, Description TEXT, Category TEXT, City TEXT, Price REAL, Rating REAL, Embedding TEXT ); """) data = pd.read_csv('dataset/tourism_place.csv') # check if the table is empty cur.execute("SELECT * FROM places") if cur.fetchone() is None: # Store the places in the database for i in tqdm(range(len(data))): cur.execute(""" INSERT INTO places (Place_Name, Description, Category, City, Price, Rating) VALUES (?, ?, ?, ?, ?, ?) """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i])) ) con.commit() # Compute and store embeddings for places using the same model def compute_and_store_embeddings(): cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places") places = cur.fetchall() for place in places: text = f"{place[1]} {place[2]} {place[3]} {place[4]}" embedding = model.encode(text) embedding_str = ','.join([str(x) for x in embedding]) cur.execute("UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0])) con.commit() compute_and_store_embeddings() # Normalize user query using Groq VM def normalize_query(user_query): try: response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[{ "role": "user", "content": f""" Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City. Return the response as: "Place name, Category, Description, City". """ }] ) normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip() return normalized_user_query + str(user_query) except Exception as e: print(f"Error normalizing query: {e}") return "" # Generate user embedding def get_user_embedding(query): try: return model.encode(query) except Exception as e: print(f"Error generating embedding: {e}") return np.zeros(512) # Find similar places def get_similar_places(user_embedding): similarities = [] res = cur.execute("SELECT * FROM places").fetchall() for place in res: embedding_str = place['Embedding'] embedding = np.array([float(x) for x in embedding_str.split(',')]) similarity = cosine_similarity([user_embedding], [embedding])[0][0] similarities.append((place, similarity)) ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True) return ranked_results # Get top 10 destinations def get_top_10_destinations(user_query): normalized_query = normalize_query(user_query) user_embedding = get_user_embedding(normalized_query) similar_places = get_similar_places(user_embedding) if not similar_places: return "Tidak ada tempat yang ditemukan." return similar_places[:10] # Generate response using Groq VM def generate_response(user_query, top_places, temperature): try: destinations_data = ", ".join([ f'{{"name": "{place[0]["Place_Name"]}", "city": "{place[0]["City"]}", "category": "{place[0]["Category"]}", "rating": {place[0]["Rating"]}, "description": "{place[0]["Description"]}"}}' for place in top_places ]) system_prompt = f""" You are a tour guide assistant. Present the tourism recommendations to the user in Bahasa Indonesia. Only return maximum 5 places that suitable what user wants and provided the data in a clear and concise format. Only return the city that mentioned in \"{user_query}\". """ response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"} ], temperature=temperature ) return response.choices[0].message.content except Exception as e: print(f"Error generating response: {e}") return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi." # Main chatbot function def chatbot(user_query, temperature): top_places = get_top_10_destinations(user_query) if isinstance(top_places, str): return top_places response = generate_response(user_query, top_places[:5], temperature) return response # Define Gradio Interface iface = gr.Interface( fn=chatbot, inputs=[ "text", gr.Slider( minimum=0, maximum=1, step=0.1, value=0.8, label="Temperature" ) ], outputs="text", title="Tourism Recommendation Chatbot", description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!" ) # Launch the Gradio App if __name__ == "__main__": iface.launch(share=True)