shukdevdatta123 commited on
Commit
9880e71
·
verified ·
1 Parent(s): 8993c67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -40
app.py CHANGED
@@ -4,6 +4,7 @@ import fitz # PyMuPDF
4
  import numpy as np
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from io import BytesIO
 
7
 
8
  # Function to extract text from the uploaded PDF file
9
  def extract_pdf_text(pdf_file):
@@ -24,28 +25,21 @@ def get_embeddings(texts):
24
 
25
  # Function to get the most relevant context from the PDF for the query
26
  def get_relevant_context(pdf_text, query, num_contexts=3):
27
- # Split the PDF text into chunks for better matching
28
  pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
29
- # Get embeddings for both the document and the query
30
  pdf_embeddings = get_embeddings(pdf_text_chunks)
31
  query_embedding = get_embeddings([query])[0]
32
-
33
- # Compute cosine similarity between query and document chunks
34
  similarities = cosine_similarity([query_embedding], pdf_embeddings)
35
  top_indices = similarities[0].argsort()[-num_contexts:][::-1]
36
-
37
- # Combine the top context pieces
38
  relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
39
  return relevant_context
40
 
41
  # Function to generate a response from GPT-4 chat model
42
- def generate_response(context, question):
43
- messages = [
44
- {"role": "system", "content": "You are a helpful assistant expert on GPT-4."},
45
- {"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
46
- ]
47
  response = openai.ChatCompletion.create(
48
- model="gpt-4o-mini", # Use the GPT-4 chat model
49
  messages=messages,
50
  max_tokens=1200,
51
  temperature=0.7,
@@ -57,44 +51,44 @@ def is_irrelevant_question(question):
57
  irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
58
  return any(keyword in question.lower() for keyword in irrelevant_keywords)
59
 
60
- # Streamlit UI
61
  def main():
62
- st.title("GPT-4 Research Paper Chatbot")
63
  st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
64
-
65
- # User input: OpenAI API key
66
- openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")
67
-
68
  if openai_api_key:
69
  openai.api_key = openai_api_key
70
  st.success("API Key successfully set!")
71
 
72
- # Upload the PDF file
73
- pdf_file = st.file_uploader("Upload GPT-4 Research Paper PDF", type="pdf")
74
-
75
  if pdf_file is not None:
76
- # Extract text from the uploaded PDF
77
- pdf_text = extract_pdf_text(pdf_file)
78
- st.write("PDF content loaded successfully!")
79
-
80
- # User input: the question they want to ask
81
- question = st.text_input("Ask your question:")
82
 
83
- if question:
84
- # Check if the question is irrelevant
 
 
 
 
 
 
 
 
85
  if is_irrelevant_question(question):
86
- st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.")
87
  else:
88
- # Get the most relevant context from the document
89
- relevant_context = get_relevant_context(pdf_text, question)
90
-
91
- # Generate the response from GPT-4 chat model
92
- answer = generate_response(relevant_context, question)
93
-
94
- # Display the answer
95
- st.write(f"Answer: {answer}")
96
  else:
97
- st.warning("Please enter your OpenAI API Key to use the chatbot.")
98
 
99
  if __name__ == "__main__":
100
- main()
 
4
  import numpy as np
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from io import BytesIO
7
+ import time
8
 
9
  # Function to extract text from the uploaded PDF file
10
  def extract_pdf_text(pdf_file):
 
25
 
26
  # Function to get the most relevant context from the PDF for the query
27
  def get_relevant_context(pdf_text, query, num_contexts=3):
 
28
  pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
 
29
  pdf_embeddings = get_embeddings(pdf_text_chunks)
30
  query_embedding = get_embeddings([query])[0]
 
 
31
  similarities = cosine_similarity([query_embedding], pdf_embeddings)
32
  top_indices = similarities[0].argsort()[-num_contexts:][::-1]
 
 
33
  relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
34
  return relevant_context
35
 
36
  # Function to generate a response from GPT-4 chat model
37
+ def generate_response(context, question, chat_history):
38
+ messages = [{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}]
39
+ messages.extend(chat_history)
40
+ messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {question}"})
 
41
  response = openai.ChatCompletion.create(
42
+ model="gpt-4o-mini",
43
  messages=messages,
44
  max_tokens=1200,
45
  temperature=0.7,
 
51
  irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
52
  return any(keyword in question.lower() for keyword in irrelevant_keywords)
53
 
54
+ # Streamlit Chat UI
55
  def main():
56
+ st.title("📄 GPT-4 Research Paper Chatbot")
57
  st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
58
+
59
+ openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password")
 
 
60
  if openai_api_key:
61
  openai.api_key = openai_api_key
62
  st.success("API Key successfully set!")
63
 
64
+ pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf")
 
 
65
  if pdf_file is not None:
66
+ if "pdf_text" not in st.session_state:
67
+ st.session_state.pdf_text = extract_pdf_text(pdf_file)
68
+ st.session_state.chat_history = []
69
+ st.session_state.conversation_active = True
 
 
70
 
71
+ st.write("✅ PDF content loaded successfully! Start asking questions.")
72
+ question = st.text_input("💬 Ask your question:")
73
+
74
+ if st.button("🚪 End Conversation"):
75
+ st.write("👋 Conversation ended. Refreshing...")
76
+ st.session_state.conversation_active = False
77
+ time.sleep(2)
78
+ st.experimental_rerun()
79
+
80
+ if question and st.session_state.conversation_active:
81
  if is_irrelevant_question(question):
82
+ response = "Sorry, I can only answer questions related to GPT-4."
83
  else:
84
+ relevant_context = get_relevant_context(st.session_state.pdf_text, question)
85
+ response = generate_response(relevant_context, question, st.session_state.chat_history)
86
+ st.session_state.chat_history.append({"role": "user", "content": question})
87
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
88
+
89
+ st.write(f"🤖 **GPT-4:** {response}")
 
 
90
  else:
91
+ st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.")
92
 
93
  if __name__ == "__main__":
94
+ main()