shukdevdatta123 commited on
Commit
f394d98
·
verified ·
1 Parent(s): f7e33a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -31
app.py CHANGED
@@ -4,7 +4,6 @@ import fitz # PyMuPDF
4
  import numpy as np
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from io import BytesIO
7
- import time
8
 
9
  # Function to extract text from the uploaded PDF file
10
  def extract_pdf_text(pdf_file):
@@ -25,21 +24,28 @@ def get_embeddings(texts):
25
 
26
  # Function to get the most relevant context from the PDF for the query
27
  def get_relevant_context(pdf_text, query, num_contexts=3):
 
28
  pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
 
29
  pdf_embeddings = get_embeddings(pdf_text_chunks)
30
  query_embedding = get_embeddings([query])[0]
 
 
31
  similarities = cosine_similarity([query_embedding], pdf_embeddings)
32
  top_indices = similarities[0].argsort()[-num_contexts:][::-1]
 
 
33
  relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
34
  return relevant_context
35
 
36
  # Function to generate a response from GPT-4 chat model
37
- def generate_response(context, question, chat_history):
38
- messages = [{"role": "system", "content": "You are a helpful assistant expert on GPT-4."}]
39
- messages.extend(chat_history)
40
- messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {question}"})
 
41
  response = openai.ChatCompletion.create(
42
- model="gpt-4o-mini",
43
  messages=messages,
44
  max_tokens=1200,
45
  temperature=0.7,
@@ -51,44 +57,44 @@ def is_irrelevant_question(question):
51
  irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
52
  return any(keyword in question.lower() for keyword in irrelevant_keywords)
53
 
54
- # Streamlit Chat UI
55
  def main():
56
  st.title("📄 GPT-4 Research Paper Chatbot")
57
- st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
58
-
 
59
  openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password")
 
60
  if openai_api_key:
61
  openai.api_key = openai_api_key
62
  st.success("API Key successfully set!")
63
 
 
64
  pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf")
 
65
  if pdf_file is not None:
66
- if "pdf_text" not in st.session_state:
67
- st.session_state.pdf_text = extract_pdf_text(pdf_file)
68
- st.session_state.chat_history = []
69
- st.session_state.conversation_active = True
70
-
71
  st.write("✅ PDF content loaded successfully! Start asking questions.")
72
- question = st.text_input("💬 Ask your question:")
73
-
74
- if st.button("🚪 End Conversation"):
75
- st.write("👋 Conversation ended. Refreshing...")
76
- st.session_state.conversation_active = False
77
- time.sleep(2)
78
- st.rerun()
79
-
80
- if question and st.session_state.conversation_active:
81
  if is_irrelevant_question(question):
82
- response = "Sorry, I can only answer questions related to GPT-4."
83
  else:
84
- relevant_context = get_relevant_context(st.session_state.pdf_text, question)
85
- response = generate_response(relevant_context, question, st.session_state.chat_history)
86
- st.session_state.chat_history.append({"role": "user", "content": question})
87
- st.session_state.chat_history.append({"role": "assistant", "content": response})
88
-
89
- st.write(f"🤖 **GPT-4:** {response}")
 
 
90
  else:
91
  st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.")
92
 
93
  if __name__ == "__main__":
94
- main()
 
4
  import numpy as np
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from io import BytesIO
 
7
 
8
  # Function to extract text from the uploaded PDF file
9
  def extract_pdf_text(pdf_file):
 
24
 
25
  # Function to get the most relevant context from the PDF for the query
26
  def get_relevant_context(pdf_text, query, num_contexts=3):
27
+ # Split the PDF text into chunks for better matching
28
  pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
29
+ # Get embeddings for both the document and the query
30
  pdf_embeddings = get_embeddings(pdf_text_chunks)
31
  query_embedding = get_embeddings([query])[0]
32
+
33
+ # Compute cosine similarity between query and document chunks
34
  similarities = cosine_similarity([query_embedding], pdf_embeddings)
35
  top_indices = similarities[0].argsort()[-num_contexts:][::-1]
36
+
37
+ # Combine the top context pieces
38
  relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
39
  return relevant_context
40
 
41
  # Function to generate a response from GPT-4 chat model
42
+ def generate_response(context, question):
43
+ messages = [
44
+ {"role": "system", "content": "You are a helpful assistant expert on GPT-4."},
45
+ {"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
46
+ ]
47
  response = openai.ChatCompletion.create(
48
+ model="gpt-4o-mini", # Use the GPT-4 chat model
49
  messages=messages,
50
  max_tokens=1200,
51
  temperature=0.7,
 
57
  irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
58
  return any(keyword in question.lower() for keyword in irrelevant_keywords)
59
 
60
+ # Streamlit UI
61
  def main():
62
  st.title("📄 GPT-4 Research Paper Chatbot")
63
+ st.write("💬 Ask any question related to the GPT-4 paper, and I'll try to answer it!")
64
+
65
+ # User input: OpenAI API key
66
  openai_api_key = st.text_input("🔑 Enter your OpenAI API Key:", type="password")
67
+
68
  if openai_api_key:
69
  openai.api_key = openai_api_key
70
  st.success("API Key successfully set!")
71
 
72
+ # Upload the PDF file
73
  pdf_file = st.file_uploader("📂 Upload GPT-4 Research Paper PDF", type="pdf")
74
+
75
  if pdf_file is not None:
76
+ # Extract text from the uploaded PDF
77
+ pdf_text = extract_pdf_text(pdf_file)
 
 
 
78
  st.write("✅ PDF content loaded successfully! Start asking questions.")
79
+
80
+ # User input: the question they want to ask
81
+ question = st.text_input("Ask your question:")
82
+
83
+ if question:
84
+ # Check if the question is irrelevant
 
 
 
85
  if is_irrelevant_question(question):
86
+ st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.")
87
  else:
88
+ # Get the most relevant context from the document
89
+ relevant_context = get_relevant_context(pdf_text, question)
90
+
91
+ # Generate the response from GPT-4 chat model
92
+ answer = generate_response(relevant_context, question)
93
+
94
+ # Display the answer
95
+ st.write(f"🤖 Answer: {answer}")
96
  else:
97
  st.warning("⚠️ Please enter your OpenAI API Key to use the chatbot.")
98
 
99
  if __name__ == "__main__":
100
+ main()