import os from groq import Groq import gradio as gr import numpy as np from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import sqlite3 import pandas as pd from tqdm import tqdm # Get the Groq API key from environment variables (in Hugging Face, this is stored as a secret) client = Groq( # This is the default and can be omitted api_key=os.environ.get("GROQ_API_KEY"), ) con = sqlite3.connect("file::memory:?cache=shared") con.row_factory = sqlite3.Row cur = con.cursor() # create table if not exists cur.execute(""" CREATE TABLE IF NOT EXISTS places ( Place_Id INTEGER PRIMARY KEY, -- SQLite auto-increments INTEGER PRIMARY KEY automatically Place_Name TEXT NOT NULL, -- SQLite uses TEXT instead of VARCHAR Description TEXT, Category TEXT, City TEXT, Price REAL, -- SQLite uses REAL instead of DECIMAL or FLOAT Rating REAL, Embedding TEXT ); """) data = pd.read_csv('tourism_place.csv') # check if the table is empty cur.execute("SELECT * FROM places") if cur.fetchone() is None: # Store the places in the database for i in tqdm(range(len(data))): cur.execute(""" INSERT INTO places (Place_Name, Description, Category, City, Price, Rating) VALUES (?, ?, ?, ?, ?, ?) """, (data['Place_Name'][i], data['Description'][i], data['Category'][i], data['City'][i], float(data['Price'][i]), float(data['Rating'][i])) ) # Commit the changes to the database con.commit() # Compute and store embeddings def compute_and_store_embeddings(): model = SentenceTransformer('paraphrase-MiniLM-L6-v2') # Select all places from the database cur.execute("SELECT Place_Id, Place_Name, Category, Description, City FROM places") places = cur.fetchall() for place in places: # Combine PlaceName, Category, Description, and City into one string text = f"{place[1]} {place[2]} {place[3]} {place[4]}" # Generate embedding for the combined text embedding = model.encode(text) # Convert embedding to a string format to store in the database embedding_str = ','.join([str(x) for x in embedding]) # Update the place in the database with the embedding cur.execute( "UPDATE places SET Embedding = ? WHERE Place_Id = ?", (embedding_str, place[0]) ) # Commit the changes to the database con.commit() # Run the function to compute and store embeddings compute_and_store_embeddings() # Load Hugging Face model for generating embeddings model = SentenceTransformer('paraphrase-MiniLM-L6-v2') # Normalize user query using Groq VM def normalize_query(user_query): try: response = client.chat.completions.create( model="llama-3.1-70b-versatile", messages=[{ "role": "user", "content": f""" Please analyze the query: \"{user_query}\", extract Place name, Category, Description, and City. Return the response as: "Place name, Category, Description, City". """ }] ) normalized_user_query = response.choices[0].message.content.split('\n')[-1].strip() return normalized_user_query except Exception as e: print(f"Error normalizing query: {e}") return "" # Generate user embedding using Hugging Face model def get_user_embedding(query): try: return model.encode(query) except Exception as e: print(f"Error generating embedding: {e}") return np.zeros(512) # Assuming 384 as default embedding size # Find similar places based on cosine similarity def get_similar_places(user_embedding): similarities = [] # Select all places from the database res = cur.execute("SELECT * FROM places").fetchall() for place in res: embedding_str = place['Embedding'] # Assuming embeddings are stored as comma-separated strings in the database embedding = np.array([float(x) for x in embedding_str.split(',')]) # Convert the string back to a numpy array # Compute cosine similarity similarity = cosine_similarity([user_embedding], [embedding])[0][0] similarities.append((place, similarity)) # Sort results based on similarity and then by rating ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True) # Return top places return ranked_results # Main function to get top 5 destinations def get_top_5_destinations(user_query): normalized_query = normalize_query(user_query) user_embedding = get_user_embedding(normalized_query) similar_places = get_similar_places(user_embedding) if not similar_places: return "Tidak ada tempat yang ditemukan." top_places = [] for i, (place, similarity) in enumerate(similar_places): top_places.append({ 'name': place['Place_Name'], 'city': place['City'], 'category': place['Category'], 'rating': place['Rating'], 'description': place['Description'], 'similarity': similarity }) return top_places # Generate response to user using Groq VM def generate_response(user_query, top_places): try: # Prepare the destinations data in JSON format for the model to use directly destinations_data = ", ".join([ f'{{"name": "{place["name"]}", "city": "{place["city"]}", "category": "{place["category"]}", "rating": {place["rating"]}, "description": "{place["description"]}"}}' for place in top_places ]) # System prompt: Simplified and focused on returning only the recommendations system_prompt = """ You are a tour guide assistant. Your task is to present the following tourism recommendations to the user in Bahasa Indonesia. - For each destination, include the name, city, category, rating, and a short description. - Do not provide any additional commentary. - Only return the provided data in a clear and concise format. """ # Generate the response using the model response = client.chat.completions.create( model="llama-3.1-70b-versatile", messages=[ {"role": "system", "content": system_prompt}, # System prompt defines behavior {"role": "user", "content": f"Berikut adalah rekomendasi berdasarkan data: {destinations_data}"} ] ) # Return the response content generated by the model return response.choices[0].message.content except Exception as e: print(f"Error generating response: {e}") return "Maaf, terjadi kesalahan dalam menghasilkan rekomendasi." # Gradio Interface - User Input and Output def chatbot(user_query): # Step 1: Get the top 5 destinations top_places = get_top_5_destinations(user_query) if isinstance(top_places, str): # Error case, e.g. "No places found" return top_places # Step 2: Generate the chatbot's response response = generate_response(user_query, top_places) return response # Define Gradio Interface iface = gr.Interface( fn=chatbot, inputs="text", outputs="text", title="Tourism Recommendation Chatbot", description="Masukkan pertanyaan wisata Anda dan dapatkan rekomendasi tempat terbaik!" ) # Launch the Gradio App if __name__ == "__main__": iface.launch(share=True)