import gradio as gr from sentence_transformers import SentenceTransformer, util import openai import os os.environ["TOKENIZERS_PARALLELISM"] = "false" # Initialize paths and model identifiers for easy configuration and maintenance filename = "output_topic_details.txt" # Path to the file storing chess-specific details retrieval_model_name = 'output/sentence-transformer-finetuned/' openai.api_key = os.environ["OPENAI_API_KEY"] system_message = "You are a college chatbot specialized in providing information on college,scholarships, and mentors." # Initial system message to set the behavior of the assistant messages = [{"role": "system", "content": system_message}] # Attempt to load the necessary models and provide feedback on success or failure try: retrieval_model = SentenceTransformer(retrieval_model_name) print("Models loaded successfully.") except Exception as e: print(f"Failed to load models: {e}") def load_and_preprocess_text(filename): """ Load and preprocess text from a file, removing empty lines and stripping whitespace. """ try: with open(filename, 'r', encoding='utf-8') as file: segments = [line.strip() for line in file if line.strip()] print("Text loaded and preprocessed successfully.") return segments except Exception as e: print(f"Failed to load or preprocess text: {e}") return [] segments = load_and_preprocess_text(filename) def find_relevant_segment(user_query, segments): """ Find the most relevant text segment for a user's query using cosine similarity among sentence embeddings. This version finds the best match based on the content of the query. """ try: # Lowercase the query for better matching lower_query = user_query.lower() # Encode the query and the segments query_embedding = retrieval_model.encode(lower_query) segment_embeddings = retrieval_model.encode(segments) # Compute cosine similarities between the query and the segments similarities = util.pytorch_cos_sim(query_embedding, segment_embeddings)[0] # Find the index of the most similar segment best_idx = similarities.argmax() # Return the most relevant segment return segments[best_idx] except Exception as e: print(f"Error in finding relevant segment: {e}") return "" def generate_response(user_query, relevant_segment): try: user_message = f"Here's what I found about scholarships: {relevant_segment}" messages.append({"role": "user", "content": user_message}) response = openai.ChatCompletion.create( model="gpt-4o", messages=messages, max_tokens=500, # can try increasing this if responses are cut off temperature=0.5, top_p=1, frequency_penalty=0.5, presence_penalty=0.5, ) return response['choices'][0]['message']['content'].strip() except Exception as e: print(f"Error in generating response: {e}") return f"Error in generating response: {e}" def query_model(question): """ Process a question, find relevant information, and generate a response. """ if question == "": return "This is ScholarSage! Ask me anything about college or scholarships!" relevant_segment = find_relevant_segment(question, segments) if not relevant_segment: return "Sorry, that's not a spell I know of D: I couldn't find the information! Please refine your question." response = generate_response(question, relevant_segment) return response # Define the welcome message and specific topics the chatbot can provide information about welcome_message = """ # 🪄 Welcome to ScholarSage! 🧙‍♀️ ## An AI-driven wizard for all college-related queries! Created by Sadia, Jinny, and Kristy of the 2024 Kode With Klossy NYC Camp. """ topics = """ ### Feel Free to ask me anything from the topics below! Reminder that I can only summon info about NY colleges and CS majors. Sorry! - College - Scholarships """ subtopics = """ ### Focus questions on these subtopics: - List of Colleges in NYS 1. best colleges for CS 2. private 3. public 4. ivy leagues - List of Scholarships 1. low income student friendly 2. specific to a certain college 3. national scholarships """ def display_image(): return "https://huggingface.co/spaces/scholar-sage/Scholar-Sage/resolve/main/Screenshot%202024-08-01%20at%203.04.19%E2%80%AFPM.png" theme = gr.themes.Soft( primary_hue="amber", secondary_hue="rose", neutral_hue="rose", ).set( body_text_color='*neutral_500', background_fill_primary='*primary_50', border_color_primary='*secondary_400', block_background_fill='*background_fill_primary', block_border_width='1px', block_border_width_dark='1px', block_label_background_fill='*background_fill_primary', block_label_background_fill_dark='*background_fill_secondary', block_label_text_color='*neutral_500', block_label_text_color_dark='*neutral_200', block_label_margin='0', block_label_padding='*spacing_sm *spacing_lg', block_label_radius='calc(*radius_lg - 1px) 0 calc(*radius_lg - 1px) 0', block_label_text_size='*text_sm', block_label_text_weight='400', block_title_background_fill='none', block_title_background_fill_dark='none', block_title_text_color='*neutral_500', block_title_text_color_dark='*neutral_200', block_title_padding='0', block_title_radius='none', block_title_text_weight='400', panel_border_width='0', panel_border_width_dark='0', input_background_fill='*neutral_100', input_border_color='*border_color_primary', input_shadow='none', input_shadow_dark='none', input_shadow_focus='*input_shadow', input_shadow_focus_dark='*input_shadow', slider_color='#2563eb', slider_color_dark='#2563eb', button_shadow='none', button_shadow_active='none', button_shadow_hover='none', button_primary_background_fill='*primary_200', button_primary_background_fill_hover='*button_primary_background_fill', button_primary_background_fill_hover_dark='*button_primary_background_fill', button_primary_text_color='*primary_600', button_secondary_background_fill='*neutral_200', button_secondary_background_fill_hover='*button_secondary_background_fill', button_secondary_background_fill_hover_dark='*button_secondary_background_fill', button_secondary_text_color='*neutral_700', button_cancel_background_fill_hover='*button_cancel_background_fill', button_cancel_background_fill_hover_dark='*button_cancel_background_fill' ) # Setup the Gradio Blocks interface with custom layout components with gr.Blocks(theme=theme) as demo: gr.Image(display_image(), container = False, show_share_button = False, show_download_button = False, label="output", show_label=True, elem_id="output_image") gr.Markdown(welcome_message) # Display the formatted welcome message with gr.Row(): with gr.Column(): gr.Markdown(topics) # Show the topics on the left side gr.Markdown(subtopics) with gr.Row(): with gr.Column(): question = gr.Textbox(label="Your question", placeholder="What do you want to ask about?") answer = gr.Textbox(label="ScholarSage Response", placeholder="ScholarSage will respond here...", interactive=False, lines=10) submit_button = gr.Button("Submit") submit_button.click(fn=query_model, inputs=question, outputs=answer) demo.launch() # Launch the Gradio app to allow user interaction demo.launch(share=True)