|
import streamlit as st |
|
import openai |
|
import fitz |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from io import BytesIO |
|
import time |
|
|
|
|
|
def extract_pdf_text(pdf_file): |
|
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") |
|
text = "" |
|
for page in doc: |
|
text += page.get_text("text") |
|
return text |
|
|
|
|
|
def get_embeddings(texts): |
|
response = openai.Embedding.create( |
|
model="text-embedding-ada-002", |
|
input=texts |
|
) |
|
embeddings = [embedding['embedding'] for embedding in response['data']] |
|
return embeddings |
|
|
|
|
|
def get_relevant_context(pdf_text, query, num_contexts=3): |
|
pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)] |
|
pdf_embeddings = get_embeddings(pdf_text_chunks) |
|
query_embedding = get_embeddings([query])[0] |
|
similarities = cosine_similarity([query_embedding], pdf_embeddings) |
|
top_indices = similarities[0].argsort()[-num_contexts:][::-1] |
|
relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices]) |
|
return relevant_context |
|
|
|
|
|
def generate_response(context, question, chat_history): |
|
messages = [{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}] |
|
messages.extend(chat_history) |
|
messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {question}"}) |
|
response = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=messages, |
|
max_tokens=1200, |
|
temperature=0.7, |
|
) |
|
return response['choices'][0]['message']['content'].strip() |
|
|
|
|
|
def is_irrelevant_question(question): |
|
irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"] |
|
return any(keyword in question.lower() for keyword in irrelevant_keywords) |
|
|
|
|
|
def main(): |
|
st.title("📄 GPT-4 Research Paper Chatbot") |
|
st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!") |
|
|
|
openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password") |
|
if openai_api_key: |
|
openai.api_key = openai_api_key |
|
st.success("API Key successfully set!") |
|
|
|
pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf") |
|
if pdf_file is not None: |
|
if "pdf_text" not in st.session_state: |
|
st.session_state.pdf_text = extract_pdf_text(pdf_file) |
|
st.session_state.chat_history = [] |
|
st.session_state.conversation_active = True |
|
|
|
st.write("✅ PDF content loaded successfully! Start asking questions.") |
|
question = st.text_input("💬 Ask your question:") |
|
|
|
if st.button("🚪 End Conversation"): |
|
st.write("👋 Conversation ended. Refreshing...") |
|
st.session_state.conversation_active = False |
|
time.sleep(2) |
|
st.rerun() |
|
|
|
if question and st.session_state.conversation_active: |
|
if is_irrelevant_question(question): |
|
response = "Sorry, I can only answer questions related to GPT-4." |
|
else: |
|
relevant_context = get_relevant_context(st.session_state.pdf_text, question) |
|
response = generate_response(relevant_context, question, st.session_state.chat_history) |
|
st.session_state.chat_history.append({"role": "user", "content": question}) |
|
st.session_state.chat_history.append({"role": "assistant", "content": response}) |
|
|
|
st.write(f"🤖 **GPT-4:** {response}") |
|
else: |
|
st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|