import streamlit as st from openai import OpenAI import os import pandas as pd import numpy as np from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise_distances_reduction import cosine_similarity_reduction import torch # Set up OpenAI client client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Check if GPU is available device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Load metadata and embeddings (ensure these files are in your working directory or update paths) metadata_path = '/kaggle/working/leetcode_metadata.csv' # Update this path if needed embeddings_path = '/kaggle/working/leetcode_embeddings2.npy' # Update this path if needed metadata = pd.read_csv(metadata_path) embeddings = np.load(embeddings_path) # Load the SentenceTransformer model model = SentenceTransformer("all-MiniLM-L6-v2").to(device) # Load the system prompt from the file with open("prompt.txt", "r") as file: system_prompt = file.read() st.title("LeetCode to Real-World Interview Question Generator") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [{"role": "assistant", "content": system_prompt}] # Display chat messages from history on app rerun for message in st.session_state.messages[1:]: # Skip the system message with st.chat_message(message["role"]): st.markdown(message["content"]) # Function to find the top 1 most similar question based on user input def find_top_question(query): # Generate embedding for the query query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy() # Compute cosine similarity between query embedding and dataset embeddings using scikit-learn's pairwise_distances_reduction similarities = cosine_similarity_reduction( X=query_embedding.reshape(1, -1), Y=embeddings, reduce_func="argmax" ) # Get the index of the most similar result (top 1) top_index = similarities.indices[0] # Index of highest similarity # Retrieve metadata for the top result top_result = metadata.iloc[top_index].copy() top_result['similarity_score'] = similarities.distances[0] return top_result # Function to generate response using OpenAI API with debugging logs def generate_response(prompt): st.write("### Debugging Log: Data Sent to GPT") st.write(prompt) # Log the prompt being sent to GPT for debugging response = client.chat.completions.create( model="gpt-4o", messages=st.session_state.messages + [{"role": "system", "content": prompt}] ) return response.choices[0].message.content # React to user input if prompt := st.chat_input("Enter a LeetCode-related query (e.g., 'google backtracking'):"): # Display user message in chat message container st.chat_message("user").markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) # Find the top question based on user input top_question = find_top_question(prompt) # Prepare a detailed prompt for GPT using the top question's details detailed_prompt = ( f"Transform this LeetCode question into a real-world interview scenario:\n\n" f"**Company**: {top_question['company']}\n" f"**Question ID**: {top_question['questionId']}\n" f"**Question Name**: {top_question['questionName']}\n" f"**Difficulty Level**: {top_question['difficulty level']}\n" f"**Tags**: {top_question['Tags']}\n" f"**Content**: {top_question['Content']}\n" f"\nPlease create a real-world interview question based on this information." ) # Generate response using GPT-4 with detailed prompt and debugging logs response = generate_response(detailed_prompt) # Display assistant response in chat message container with st.chat_message("assistant"): st.markdown(response) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": response}) st.sidebar.markdown(""" ## About This is a LeetCode to Real-World Interview Question Generator powered by OpenAI's GPT-4. Enter a LeetCode-related query, and it will transform a relevant question into a real-world interview scenario! """)