File size: 4,000 Bytes
c0ece10
 
 
 
 
17b3855
9880e71
c0ece10
17b3855
 
 
c0ece10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d9fd64
9880e71
 
 
 
5d9fd64
9880e71
5d9fd64
9aba39a
c0ece10
 
8993c67
c0ece10
 
 
 
 
 
9880e71
c0ece10
9880e71
c0ece10
9880e71
 
c0ece10
 
 
 
9880e71
c0ece10
9880e71
 
 
 
c0ece10
9880e71
 
 
 
 
 
 
f7e33a9
9880e71
 
c0ece10
9880e71
c0ece10
9880e71
 
 
 
 
 
c0ece10
9880e71
c0ece10
 
9880e71
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import openai
import fitz  # PyMuPDF
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from io import BytesIO
import time

# Function to extract text from the uploaded PDF file
def extract_pdf_text(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text("text")
    return text

# Function to get embeddings for the text
def get_embeddings(texts):
    response = openai.Embedding.create(
        model="text-embedding-ada-002",
        input=texts
    )
    embeddings = [embedding['embedding'] for embedding in response['data']]
    return embeddings

# Function to get the most relevant context from the PDF for the query
def get_relevant_context(pdf_text, query, num_contexts=3):
    pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
    pdf_embeddings = get_embeddings(pdf_text_chunks)
    query_embedding = get_embeddings([query])[0]
    similarities = cosine_similarity([query_embedding], pdf_embeddings)
    top_indices = similarities[0].argsort()[-num_contexts:][::-1]
    relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
    return relevant_context

# Function to generate a response from GPT-4 chat model
def generate_response(context, question, chat_history):
    messages = [{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}]
    messages.extend(chat_history)
    messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {question}"})
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=messages,
        max_tokens=1200,
        temperature=0.7,
    )
    return response['choices'][0]['message']['content'].strip()

# Function to handle irrelevant questions
def is_irrelevant_question(question):
    irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
    return any(keyword in question.lower() for keyword in irrelevant_keywords)

# Streamlit Chat UI
def main():
    st.title("📄 GPT-4 Research Paper Chatbot")
    st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")

    openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password")
    if openai_api_key:
        openai.api_key = openai_api_key
        st.success("API Key successfully set!")

        pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf")
        if pdf_file is not None:
            if "pdf_text" not in st.session_state:
                st.session_state.pdf_text = extract_pdf_text(pdf_file)
                st.session_state.chat_history = []
                st.session_state.conversation_active = True

            st.write("✅ PDF content loaded successfully! Start asking questions.")
            question = st.text_input("💬 Ask your question:")
            
            if st.button("🚪 End Conversation"):
                st.write("👋 Conversation ended. Refreshing...")
                st.session_state.conversation_active = False
                time.sleep(2)
                st.rerun()
            
            if question and st.session_state.conversation_active:
                if is_irrelevant_question(question):
                    response = "Sorry, I can only answer questions related to GPT-4."
                else:
                    relevant_context = get_relevant_context(st.session_state.pdf_text, question)
                    response = generate_response(relevant_context, question, st.session_state.chat_history)
                    st.session_state.chat_history.append({"role": "user", "content": question})
                    st.session_state.chat_history.append({"role": "assistant", "content": response})
                
                st.write(f"🤖 **GPT-4:** {response}")
    else:
        st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.")

if __name__ == "__main__":
    main()