import streamlit as st import openai import fitz # PyMuPDF import numpy as np from sklearn.metrics.pairwise import cosine_similarity from io import BytesIO import time # Function to extract text from the uploaded PDF file def extract_pdf_text(pdf_file): doc = fitz.open(stream=pdf_file.read(), filetype="pdf") text = "" for page in doc: text += page.get_text("text") return text # Function to get embeddings for the text def get_embeddings(texts): response = openai.Embedding.create( model="text-embedding-ada-002", input=texts ) embeddings = [embedding['embedding'] for embedding in response['data']] return embeddings # Function to get the most relevant context from the PDF for the query def get_relevant_context(pdf_text, query, num_contexts=3): pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)] pdf_embeddings = get_embeddings(pdf_text_chunks) query_embedding = get_embeddings([query])[0] similarities = cosine_similarity([query_embedding], pdf_embeddings) top_indices = similarities[0].argsort()[-num_contexts:][::-1] relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices]) return relevant_context # Function to generate a response from GPT-4 chat model def generate_response(context, question, chat_history): messages = [{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}] messages.extend(chat_history) messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {question}"}) response = openai.ChatCompletion.create( model="gpt-4o-mini", messages=messages, max_tokens=1200, temperature=0.7, ) return response['choices'][0]['message']['content'].strip() # Function to handle irrelevant questions def is_irrelevant_question(question): irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"] return any(keyword in question.lower() for keyword in irrelevant_keywords) # Streamlit Chat UI def main(): st.title("📄 GPT-4 Research Paper Chatbot") st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!") openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password") if openai_api_key: openai.api_key = openai_api_key st.success("API Key successfully set!") pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf") if pdf_file is not None: if "pdf_text" not in st.session_state: st.session_state.pdf_text = extract_pdf_text(pdf_file) st.session_state.chat_history = [] st.session_state.conversation_active = True st.write("✅ PDF content loaded successfully! Start asking questions.") question = st.text_input("💬 Ask your question:") if st.button("🚪 End Conversation"): st.write("👋 Conversation ended. Refreshing...") st.session_state.conversation_active = False time.sleep(2) st.rerun() if question and st.session_state.conversation_active: if is_irrelevant_question(question): response = "Sorry, I can only answer questions related to GPT-4." else: relevant_context = get_relevant_context(st.session_state.pdf_text, question) response = generate_response(relevant_context, question, st.session_state.chat_history) st.session_state.chat_history.append({"role": "user", "content": question}) st.session_state.chat_history.append({"role": "assistant", "content": response}) st.write(f"🤖 **GPT-4:** {response}") else: st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.") if __name__ == "__main__": main()