File size: 4,000 Bytes
c0ece10 17b3855 9880e71 c0ece10 17b3855 c0ece10 5d9fd64 9880e71 5d9fd64 9880e71 5d9fd64 9aba39a c0ece10 8993c67 c0ece10 9880e71 c0ece10 9880e71 c0ece10 9880e71 c0ece10 9880e71 c0ece10 9880e71 c0ece10 9880e71 f7e33a9 9880e71 c0ece10 9880e71 c0ece10 9880e71 c0ece10 9880e71 c0ece10 9880e71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import streamlit as st
import openai
import fitz # PyMuPDF
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from io import BytesIO
import time
# Function to extract text from the uploaded PDF file
def extract_pdf_text(pdf_file):
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text("text")
return text
# Function to get embeddings for the text
def get_embeddings(texts):
response = openai.Embedding.create(
model="text-embedding-ada-002",
input=texts
)
embeddings = [embedding['embedding'] for embedding in response['data']]
return embeddings
# Function to get the most relevant context from the PDF for the query
def get_relevant_context(pdf_text, query, num_contexts=3):
pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
pdf_embeddings = get_embeddings(pdf_text_chunks)
query_embedding = get_embeddings([query])[0]
similarities = cosine_similarity([query_embedding], pdf_embeddings)
top_indices = similarities[0].argsort()[-num_contexts:][::-1]
relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
return relevant_context
# Function to generate a response from GPT-4 chat model
def generate_response(context, question, chat_history):
messages = [{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}]
messages.extend(chat_history)
messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {question}"})
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=messages,
max_tokens=1200,
temperature=0.7,
)
return response['choices'][0]['message']['content'].strip()
# Function to handle irrelevant questions
def is_irrelevant_question(question):
irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
return any(keyword in question.lower() for keyword in irrelevant_keywords)
# Streamlit Chat UI
def main():
st.title("📄 GPT-4 Research Paper Chatbot")
st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password")
if openai_api_key:
openai.api_key = openai_api_key
st.success("API Key successfully set!")
pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf")
if pdf_file is not None:
if "pdf_text" not in st.session_state:
st.session_state.pdf_text = extract_pdf_text(pdf_file)
st.session_state.chat_history = []
st.session_state.conversation_active = True
st.write("✅ PDF content loaded successfully! Start asking questions.")
question = st.text_input("💬 Ask your question:")
if st.button("🚪 End Conversation"):
st.write("👋 Conversation ended. Refreshing...")
st.session_state.conversation_active = False
time.sleep(2)
st.rerun()
if question and st.session_state.conversation_active:
if is_irrelevant_question(question):
response = "Sorry, I can only answer questions related to GPT-4."
else:
relevant_context = get_relevant_context(st.session_state.pdf_text, question)
response = generate_response(relevant_context, question, st.session_state.chat_history)
st.session_state.chat_history.append({"role": "user", "content": question})
st.session_state.chat_history.append({"role": "assistant", "content": response})
st.write(f"🤖 **GPT-4:** {response}")
else:
st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.")
if __name__ == "__main__":
main()
|