|
import streamlit as st |
|
import openai |
|
import fitz |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from io import BytesIO |
|
|
|
|
|
def extract_pdf_text(pdf_file): |
|
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") |
|
text = "" |
|
for page in doc: |
|
text += page.get_text("text") |
|
return text |
|
|
|
|
|
def get_embeddings(texts): |
|
response = openai.Embedding.create( |
|
model="text-embedding-ada-002", |
|
input=texts |
|
) |
|
embeddings = [embedding['embedding'] for embedding in response['data']] |
|
return embeddings |
|
|
|
|
|
def get_relevant_context(pdf_text, query, num_contexts=3): |
|
|
|
pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)] |
|
|
|
pdf_embeddings = get_embeddings(pdf_text_chunks) |
|
query_embedding = get_embeddings([query])[0] |
|
|
|
|
|
similarities = cosine_similarity([query_embedding], pdf_embeddings) |
|
top_indices = similarities[0].argsort()[-num_contexts:][::-1] |
|
|
|
|
|
relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices]) |
|
return relevant_context |
|
|
|
|
|
def generate_response(context, question): |
|
messages = [ |
|
{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}, |
|
{"role": "user", "content": f"Context: {context}\nQuestion: {question}"} |
|
] |
|
response = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
messages=messages, |
|
max_tokens=1200, |
|
temperature=0.7, |
|
) |
|
return response['choices'][0]['message']['content'].strip() |
|
|
|
|
|
def is_irrelevant_question(question): |
|
irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"] |
|
return any(keyword in question.lower() for keyword in irrelevant_keywords) |
|
|
|
|
|
def main(): |
|
st.title("📄 GPT-4 Research Paper Chatbot") |
|
st.write("💬 Ask any question related to the GPT-4 paper, and I'll try to answer it!") |
|
|
|
|
|
openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password") |
|
|
|
if openai_api_key: |
|
openai.api_key = openai_api_key |
|
st.success("API Key successfully set!") |
|
|
|
|
|
pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf") |
|
|
|
if pdf_file is not None: |
|
|
|
pdf_text = extract_pdf_text(pdf_file) |
|
st.write("✅ PDF content loaded successfully! Start asking questions.") |
|
|
|
|
|
question = st.text_input("Ask your question:") |
|
|
|
if question: |
|
|
|
if is_irrelevant_question(question): |
|
st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.") |
|
else: |
|
|
|
relevant_context = get_relevant_context(pdf_text, question) |
|
|
|
|
|
answer = generate_response(relevant_context, question) |
|
|
|
|
|
st.write(f"🤖 Answer: {answer}") |
|
else: |
|
st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.") |
|
|
|
if __name__ == "__main__": |
|
main() |