Neurolingua commited on
Commit
f4738b1
1 Parent(s): d3d3acb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -8
app.py CHANGED
@@ -7,6 +7,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  import requests
8
  from twilio.rest import Client
9
 
 
10
  # Flask app
11
  app = Flask(__name__)
12
 
@@ -15,6 +16,35 @@ CHROMA_PATH = '/code/chroma_db'
15
  if not os.path.exists(CHROMA_PATH):
16
  os.makedirs(CHROMA_PATH)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Initialize ChromaDB
19
  def initialize_chroma():
20
  try:
@@ -46,15 +76,67 @@ def download_file(url, ext):
46
  return local_filename
47
 
48
  # Process PDF and return text
 
 
 
 
49
  def extract_text_from_pdf(pdf_filepath):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  try:
51
- document_loader = PyPDFLoader(pdf_filepath)
52
- documents = document_loader.load()
53
- text = "\n\n".join([doc.page_content for doc in documents])
54
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
  print(f"Error processing PDF: {e}")
57
- return "Error extracting text from PDF."
58
 
59
  # Flask route to handle WhatsApp webhook
60
  @app.route('/whatsapp', methods=['POST'])
@@ -63,19 +145,23 @@ def whatsapp_webhook():
63
  sender = request.values.get('From')
64
  num_media = int(request.values.get('NumMedia', 0))
65
 
 
 
66
  if num_media > 0:
67
  media_url = request.values.get('MediaUrl0')
68
  content_type = request.values.get('MediaContentType0')
69
 
70
  if content_type == 'application/pdf':
71
  filepath = download_file(media_url, ".pdf")
72
- extracted_text = extract_text_from_pdf(filepath)
73
- response_text = f"Here is the content of the PDF:\n\n{extracted_text}"
74
  else:
75
  response_text = "Unsupported file type. Please upload a PDF document."
76
  else:
77
- response_text = "Please upload a PDF document."
 
78
 
 
79
  send_message(sender, response_text)
80
  return '', 204
81
 
 
7
  import requests
8
  from twilio.rest import Client
9
 
10
+
11
  # Flask app
12
  app = Flask(__name__)
13
 
 
16
  if not os.path.exists(CHROMA_PATH):
17
  os.makedirs(CHROMA_PATH)
18
 
19
+ from ai71 import AI71
20
+
21
+ def generate_response(query, chat_history):
22
+ response = ''
23
+ try:
24
+ ai71_client = AI71(api_key=AI71_API_KEY)
25
+ chat_completion = ai71_client.chat.completions.create(
26
+ model="tiiuae/falcon-180b-chat",
27
+ messages=[
28
+ {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
29
+ {"role": "user", "content": f"Answer the query based on history {chat_history}: {query}"}
30
+ ],
31
+ stream=True
32
+ )
33
+
34
+ for chunk in chat_completion:
35
+ if chunk.choices[0].delta.content:
36
+ response += chunk.choices[0].delta.content
37
+
38
+ # Clean up response text
39
+ response = response.replace("###", '').replace('\nUser:', '')
40
+
41
+ except Exception as e:
42
+ print(f"Error generating response: {e}")
43
+ response = "An error occurred while generating the response."
44
+
45
+ return response
46
+
47
+
48
  # Initialize ChromaDB
49
  def initialize_chroma():
50
  try:
 
76
  return local_filename
77
 
78
  # Process PDF and return text
79
+
80
+
81
+ import fitz # PyMuPDF
82
+
83
  def extract_text_from_pdf(pdf_filepath):
84
+ text = ''
85
+ try:
86
+ pdf_document = fitz.open(pdf_filepath)
87
+ for page_num in range(len(pdf_document)):
88
+ page = pdf_document.load_page(page_num)
89
+ text += page.get_text()
90
+ pdf_document.close()
91
+ except Exception as e:
92
+ print(f"Error extracting text from PDF: {e}")
93
+ return None
94
+ return text
95
+
96
+ def query_rag(query_text: str, chat_history):
97
+ try:
98
+ embedding_function = HuggingFaceEmbeddings()
99
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
100
+
101
+ results = db.similarity_search_with_score(query_text, k=5)
102
+
103
+ if not results:
104
+ return "Sorry, I couldn't find any relevant information."
105
+
106
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
107
+
108
+ prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
109
+ response = generate_response(prompt, chat_history)
110
+
111
+ return response
112
+ except Exception as e:
113
+ print(f"Error querying RAG system: {e}")
114
+ return "An error occurred while querying the RAG system."
115
+
116
+
117
+ def save_pdf_and_update_database(pdf_filepath):
118
  try:
119
+ text = extract_text_from_pdf(pdf_filepath)
120
+ if not text:
121
+ print("Error extracting text from PDF.")
122
+ return
123
+
124
+ text_splitter = RecursiveCharacterTextSplitter(
125
+ chunk_size=800,
126
+ chunk_overlap=80,
127
+ length_function=len,
128
+ is_separator_regex=False,
129
+ )
130
+ chunks = text_splitter.split_text(text)
131
+
132
+ embedding_function = HuggingFaceEmbeddings()
133
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
134
+
135
+ db.add_documents(chunks)
136
+ db.persist()
137
+ print("PDF processed and data updated in Chroma.")
138
  except Exception as e:
139
  print(f"Error processing PDF: {e}")
 
140
 
141
  # Flask route to handle WhatsApp webhook
142
  @app.route('/whatsapp', methods=['POST'])
 
145
  sender = request.values.get('From')
146
  num_media = int(request.values.get('NumMedia', 0))
147
 
148
+ chat_history = [] # You need to handle chat history appropriately
149
+
150
  if num_media > 0:
151
  media_url = request.values.get('MediaUrl0')
152
  content_type = request.values.get('MediaContentType0')
153
 
154
  if content_type == 'application/pdf':
155
  filepath = download_file(media_url, ".pdf")
156
+ save_pdf_and_update_database(filepath)
157
+ response_text = "PDF has been processed. You can now ask questions related to its content."
158
  else:
159
  response_text = "Unsupported file type. Please upload a PDF document."
160
  else:
161
+ # Use RAG to generate a response based on the query
162
+ response_text = query_rag(incoming_msg, chat_history)
163
 
164
+ # Send the response back to the sender
165
  send_message(sender, response_text)
166
  return '', 204
167