File size: 3,930 Bytes
c0ece10
 
 
 
 
17b3855
c0ece10
17b3855
 
 
c0ece10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f394d98
c0ece10
f394d98
c0ece10
 
f394d98
 
c0ece10
 
f394d98
 
c0ece10
 
 
5d9fd64
f394d98
 
 
 
 
5d9fd64
f394d98
5d9fd64
9aba39a
c0ece10
 
8993c67
c0ece10
 
 
 
 
 
f394d98
c0ece10
9880e71
f394d98
 
 
9880e71
f394d98
c0ece10
 
 
 
f394d98
9880e71
f394d98
c0ece10
f394d98
 
9880e71
f394d98
 
 
 
 
 
c0ece10
f394d98
c0ece10
f394d98
 
 
 
 
 
 
 
c0ece10
9880e71
c0ece10
 
f394d98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import streamlit as st
import openai
import fitz  # PyMuPDF
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from io import BytesIO

# Function to extract text from the uploaded PDF file
def extract_pdf_text(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text("text")
    return text

# Function to get embeddings for the text
def get_embeddings(texts):
    response = openai.Embedding.create(
        model="text-embedding-ada-002",
        input=texts
    )
    embeddings = [embedding['embedding'] for embedding in response['data']]
    return embeddings

# Function to get the most relevant context from the PDF for the query
def get_relevant_context(pdf_text, query, num_contexts=3):
    # Split the PDF text into chunks for better matching
    pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
    # Get embeddings for both the document and the query
    pdf_embeddings = get_embeddings(pdf_text_chunks)
    query_embedding = get_embeddings([query])[0]
    
    # Compute cosine similarity between query and document chunks
    similarities = cosine_similarity([query_embedding], pdf_embeddings)
    top_indices = similarities[0].argsort()[-num_contexts:][::-1]
    
    # Combine the top context pieces
    relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
    return relevant_context

# Function to generate a response from GPT-4 chat model
def generate_response(context, question):
    messages = [
        {"role": "system", "content": "You are a helpful assistant expert on GPT-4."},
        {"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
    ]
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",  # Use the GPT-4 chat model
        messages=messages,
        max_tokens=1200,
        temperature=0.7,
    )
    return response['choices'][0]['message']['content'].strip()

# Function to handle irrelevant questions
def is_irrelevant_question(question):
    irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
    return any(keyword in question.lower() for keyword in irrelevant_keywords)

# Streamlit UI
def main():
    st.title("📄 GPT-4 Research Paper Chatbot")
    st.write("💬 Ask any question related to the GPT-4 paper, and I'll try to answer it!")
    
    # User input: OpenAI API key
    openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password")
    
    if openai_api_key:
        openai.api_key = openai_api_key
        st.success("API Key successfully set!")

        # Upload the PDF file
        pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf")
        
        if pdf_file is not None:
            # Extract text from the uploaded PDF
            pdf_text = extract_pdf_text(pdf_file)
            st.write("✅ PDF content loaded successfully! Start asking questions.")

            # User input: the question they want to ask
            question = st.text_input("Ask your question:")

            if question:
                # Check if the question is irrelevant
                if is_irrelevant_question(question):
                    st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.")
                else:
                    # Get the most relevant context from the document
                    relevant_context = get_relevant_context(pdf_text, question)
                    
                    # Generate the response from GPT-4 chat model
                    answer = generate_response(relevant_context, question)
                    
                    # Display the answer
                    st.write(f"🤖 Answer: {answer}")
    else:
        st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.")

if __name__ == "__main__":
    main()