shukdevdatta123 commited on
Commit
c0ece10
·
verified ·
1 Parent(s): 2f9b822

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import fitz # PyMuPDF
4
+ import numpy as np
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ # Function to extract text from the PDF file
8
+ def extract_pdf_text(pdf_file_path):
9
+ doc = fitz.open(pdf_file_path)
10
+ text = ""
11
+ for page in doc:
12
+ text += page.get_text("text")
13
+ return text
14
+
15
+ # Function to get embeddings for the text
16
+ def get_embeddings(texts):
17
+ response = openai.Embedding.create(
18
+ model="text-embedding-ada-002",
19
+ input=texts
20
+ )
21
+ embeddings = [embedding['embedding'] for embedding in response['data']]
22
+ return embeddings
23
+
24
+ # Function to get the most relevant context from the PDF for the query
25
+ def get_relevant_context(pdf_text, query, num_contexts=3):
26
+ # Split the PDF text into chunks for better matching
27
+ pdf_text_chunks = [pdf_text[i:i+1500] for i in range(0, len(pdf_text), 1500)]
28
+ # Get embeddings for both the document and the query
29
+ pdf_embeddings = get_embeddings(pdf_text_chunks)
30
+ query_embedding = get_embeddings([query])[0]
31
+
32
+ # Compute cosine similarity between query and document chunks
33
+ similarities = cosine_similarity([query_embedding], pdf_embeddings)
34
+ top_indices = similarities[0].argsort()[-num_contexts:][::-1]
35
+
36
+ # Combine the top context pieces
37
+ relevant_context = " ".join([pdf_text_chunks[i] for i in top_indices])
38
+ return relevant_context
39
+
40
+ # Function to generate a response from GPT-4o-mini model
41
+ def generate_response(context, question):
42
+ prompt = f"The following is an excerpt from a research paper on GPT-4. Please answer the question based on this context:\n\nContext: {context}\n\nQuestion: {question}\nAnswer:"
43
+ response = openai.Completion.create(
44
+ model="gpt-4o-mini", # Replace with the appropriate model identifier
45
+ prompt=prompt,
46
+ max_tokens=200,
47
+ temperature=0.7,
48
+ )
49
+ return response.choices[0].text.strip()
50
+
51
+ # Function to handle irrelevant questions
52
+ def is_irrelevant_question(question):
53
+ irrelevant_keywords = ["life", "love", "meaning", "future", "philosophy"]
54
+ return any(keyword in question.lower() for keyword in irrelevant_keywords)
55
+
56
+ # Streamlit UI
57
+ def main():
58
+ st.title("GPT-4 Research Paper Chatbot")
59
+ st.write("Ask any question related to the GPT-4 paper, and I'll try to answer it!")
60
+
61
+ # User input: OpenAI API key
62
+ openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")
63
+
64
+ if openai_api_key:
65
+ openai.api_key = openai_api_key
66
+ st.success("API Key successfully set!")
67
+
68
+ # Upload the PDF file
69
+ pdf_file = st.file_uploader("Upload GPT-4 Research Paper PDF", type="pdf")
70
+
71
+ if pdf_file is not None:
72
+ # Extract text from the uploaded PDF
73
+ pdf_text = extract_pdf_text(pdf_file)
74
+ st.write("PDF content loaded successfully!")
75
+
76
+ # User input: the question they want to ask
77
+ question = st.text_input("Ask your question:")
78
+
79
+ if question:
80
+ # Check if the question is irrelevant
81
+ if is_irrelevant_question(question):
82
+ st.write("Sorry, I don't know the answer to this question. I am an expert on GPT-4 knowledge.")
83
+ else:
84
+ # Get the most relevant context from the document
85
+ relevant_context = get_relevant_context(pdf_text, question)
86
+
87
+ # Generate the response from GPT-4o-mini
88
+ answer = generate_response(relevant_context, question)
89
+
90
+ # Display the answer
91
+ st.write(f"Answer: {answer}")
92
+ else:
93
+ st.warning("Please enter your OpenAI API Key to use the chatbot.")
94
+
95
+ if __name__ == "__main__":
96
+ main()