Spaces:

shukdevdatta123
/

GPT-4-Research-Paper-Chatbot

Sleeping

App Files Files Community

shukdevdatta123 commited on Feb 27

Commit

c0ece10

verified ·

1 Parent(s): 2f9b822

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import streamlit as st
+import openai
+import fitz  # PyMuPDF
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+# Function to extract text from the PDF file
+def extract_pdf_text(pdf_file_path):
+    doc = fitz.open(pdf_file_path)
+    text = ""
+    for page in doc:
+        text += page.get_text("text")
+    return text
+# Function to get embeddings for the text
+def get_embeddings(texts):
+    response = openai.Embedding.create(
+        model="text-embedding-ada-002",
+        input=texts
+    )
+    embeddings = [embedding['embedding'] for embedding in response['data']]
+    return embeddings
+# Function to get the most relevant context from the PDF for the query
+def get_relevant_context(pdf_text, query, num_contexts=3):
+    # Split the PDF text into chunks for better matching
+    pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
+    # Get embeddings for both the document and the query
+    pdf_embeddings = get_embeddings(pdf_text_chunks)
+    query_embedding = get_embeddings([query])[0]
+    # Compute cosine similarity between query and document chunks
+    similarities = cosine_similarity([query_embedding], pdf_embeddings)
+    top_indices = similarities[0].argsort()[-num_contexts:][::-1]
+    # Combine the top context pieces
+    relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
+    return relevant_context
+# Function to generate a response from GPT-4o-mini model
+def generate_response(context, question):
+    prompt = f"The following is an excerpt from a research paper on GPT-4. Please answer the question based on this context:\n\nContext: {context}\n\nQuestion: {question}\nAnswer:"
+    response = openai.Completion.create(
+        model="gpt-4o-mini",  # Replace with the appropriate model identifier
+        prompt=prompt,
+        max_tokens=200,
+        temperature=0.7,
+    )
+    return response.choices[0].text.strip()
+# Function to handle irrelevant questions
+def is_irrelevant_question(question):
+    irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
+    return any(keyword in question.lower() for keyword in irrelevant_keywords)
+# Streamlit UI
+def main():
+    st.title("GPT-4 Research Paper Chatbot")
+    st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
+    # User input: OpenAI API key
+    openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")
+    if openai_api_key:
+        openai.api_key = openai_api_key
+        st.success("API Key successfully set!")
+        # Upload the PDF file
+        pdf_file = st.file_uploader("Upload GPT-4 Research Paper PDF", type="pdf")
+        if pdf_file is not None:
+            # Extract text from the uploaded PDF
+            pdf_text = extract_pdf_text(pdf_file)
+            st.write("PDF content loaded successfully!")
+            # User input: the question they want to ask
+            question = st.text_input("Ask your question:")
+            if question:
+                # Check if the question is irrelevant
+                if is_irrelevant_question(question):
+                    st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.")
+                else:
+                    # Get the most relevant context from the document
+                    relevant_context = get_relevant_context(pdf_text, question)
+                    # Generate the response from GPT-4o-mini
+                    answer = generate_response(relevant_context, question)
+                    # Display the answer
+                    st.write(f"Answer: {answer}")
+    else:
+        st.warning("Please enter your OpenAI API Key to use the chatbot.")
+if __name__ == "__main__":
+    main()