Neurolingua commited on
Commit
558f5d1
1 Parent(s): bcf8e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -95
app.py CHANGED
@@ -209,39 +209,58 @@ def query_rag(query_text: str):
209
  response += chunk.choices[0].delta.content
210
 
211
  return response.replace("###", '').replace('\nUser:', '')
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
- def save_pdf_and_update_database(media_url):
214
- response = requests.get(media_url)
215
- pdf_filename = os.path.join(UPLOAD_FOLDER, f"{uuid.uuid4()}.pdf")
216
- with open(pdf_filename, 'wb') as f:
217
- f.write(response.content)
218
-
219
- document_loader = PyPDFDirectoryLoader(UPLOAD_FOLDER)
220
- documents = document_loader.load()
221
-
222
- text_splitter = RecursiveCharacterTextSplitter(
223
- chunk_size=800,
224
- chunk_overlap=80,
225
- length_function=len,
226
- is_separator_regex=False,
227
- )
228
- chunks = text_splitter.split_documents(documents)
229
-
230
- add_to_chroma(chunks)
 
 
 
 
231
 
232
  def add_to_chroma(chunks: list[Document]):
233
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
234
- chunks_with_ids = calculate_chunk_ids(chunks)
235
- existing_items = db.get(include=[])
236
- existing_ids = set(existing_items["ids"])
237
-
238
- new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]
239
-
240
- if new_chunks:
241
- new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
242
- db.add_documents(new_chunks, ids=new_chunk_ids)
243
- db.persist()
244
-
 
 
 
245
  def calculate_chunk_ids(chunks):
246
  last_page_id = None
247
  current_chunk_index = 0
@@ -262,73 +281,68 @@ def calculate_chunk_ids(chunks):
262
 
263
  return chunks
264
 
265
- @app.route("/pdf", methods=["POST"])
266
- def receive_pdf():
267
- media_url = request.values.get("MediaUrl", None)
268
- if media_url:
269
- save_pdf_and_update_database(media_url)
270
- return "PDF processed and saved successfully."
271
- return "No media URL found."
272
-
273
- @app.route("/whatsapp", methods=["POST"])
274
- def incoming_whatsapp():
275
- media_url = request.values.get("MediaUrl", None)
276
- from_number = request.values.get("From", "").strip()
277
- from_number = from_number[2:] if from_number.startswith("91") else from_number
278
  incoming_msg = request.values.get('Body', '').lower()
279
- response = MessagingResponse()
280
- message = response.message()
281
-
282
- if media_url:
283
- extension = os.path.splitext(media_url)[1]
284
- if extension.lower() == ".pdf":
285
- media_filepath = download_and_save_as_txt(media_url, account_sid, auth_token)
286
- save_pdf_and_update_database(media_url)
287
- message.body("The PDF was processed successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  else:
289
- message.body("Please send a PDF file.")
290
- return str(response)
291
-
292
- if 'get weather for' in incoming_msg:
293
- city = incoming_msg.replace("get weather for", "")
294
- temperature = get_weather(city)
295
- message.body(f'The temperature in {city} is {temperature}.')
296
- return str(response)
297
-
298
- if 'get rates' in incoming_msg:
299
- message.body(get_rates())
300
- return str(response)
301
-
302
- if 'get news' in incoming_msg:
303
- message.body(get_news())
304
- return str(response)
305
-
306
- if 'pest' in incoming_msg:
307
- text = predict_pest(media_filepath)
308
- message.body(text)
309
- return str(response)
310
-
311
- if 'disease' in incoming_msg:
312
- text = predict_disease(media_filepath)
313
- message.body(text)
314
- return str(response)
315
-
316
- if 'question:' in incoming_msg:
317
- conversation_memory.add_to_memory(f"User: {incoming_msg}")
318
- chat_history = "\n".join(conversation_memory.get_memory())
319
- response_text = generate_response(incoming_msg.replace("question:", ""), chat_history)
320
- conversation_memory.add_to_memory(f"Assistant: {response_text}")
321
- message.body(response_text)
322
- return str(response)
323
-
324
- if 'query:' in incoming_msg:
325
- query = incoming_msg.replace("query:", "").strip()
326
- response_text = query_rag(query)
327
- message.body(response_text)
328
- return str(response)
329
-
330
- message.body("I'm sorry, I don't understand that command.")
331
- return str(response)
332
 
333
  def send_message(to, body):
334
  try:
 
209
  response += chunk.choices[0].delta.content
210
 
211
  return response.replace("###", '').replace('\nUser:', '')
212
+ def download_file(url, extension):
213
+ try:
214
+ response = requests.get(url)
215
+ response.raise_for_status()
216
+ filename = f"{uuid.uuid4()}{extension}"
217
+ file_path = os.path.join(UPLOAD_FOLDER, filename)
218
+
219
+ with open(file_path, 'wb') as file:
220
+ file.write(response.content)
221
+
222
+ print(f"File downloaded and saved as {file_path}")
223
+ return file_path
224
 
225
+ except requests.exceptions.HTTPError as err:
226
+ print(f"HTTP error occurred: {err}")
227
+ except Exception as err:
228
+ print(f"An error occurred: {err}")
229
+ return None
230
+ def save_pdf_and_update_database(pdf_filepath):
231
+ try:
232
+ document_loader = PyPDFDirectoryLoader(UPLOAD_FOLDER)
233
+ documents = document_loader.load()
234
+
235
+ text_splitter = RecursiveCharacterTextSplitter(
236
+ chunk_size=800,
237
+ chunk_overlap=80,
238
+ length_function=len,
239
+ is_separator_regex=False,
240
+ )
241
+ chunks = text_splitter.split_documents(documents)
242
+
243
+ add_to_chroma(chunks)
244
+ print(f"PDF processed and data updated in Chroma.")
245
+ except Exception as e:
246
+ print(f"Error in processing PDF: {e}")
247
 
248
  def add_to_chroma(chunks: list[Document]):
249
+ try:
250
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
251
+ chunks_with_ids = calculate_chunk_ids(chunks)
252
+ existing_items = db.get(include=[])
253
+ existing_ids = set(existing_items["ids"])
254
+
255
+ new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]
256
+
257
+ if new_chunks:
258
+ new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
259
+ db.add_documents(new_chunks, ids=new_chunk_ids)
260
+ db.persist()
261
+ print(f"Chunks added to Chroma.")
262
+ except Exception as e:
263
+ print(f"Error adding chunks to Chroma: {e}")
264
  def calculate_chunk_ids(chunks):
265
  last_page_id = None
266
  current_chunk_index = 0
 
281
 
282
  return chunks
283
 
284
+
285
+ @app.route('/whatsapp', methods=['POST'])
286
+ def whatsapp_webhook():
 
 
 
 
 
 
 
 
 
 
287
  incoming_msg = request.values.get('Body', '').lower()
288
+ sender = request.values.get('From')
289
+ num_media = int(request.values.get('NumMedia', 0))
290
+
291
+ chat_history = conversation_memory.get_memory()
292
+
293
+ if num_media > 0:
294
+ media_url = request.values.get('MediaUrl0')
295
+ response_text = media_url
296
+ content_type = request.values.get('MediaContentType0')
297
+ if content_type.startswith('image/'):
298
+ filepath = convert_img(media_url, account_sid, auth_token)
299
+ try:
300
+ disease = predict_disease(filepath)
301
+ except:
302
+ disease = None
303
+ try:
304
+ pest = predict_pest(filepath)
305
+ except:
306
+ pest = None
307
+
308
+ if disease:
309
+ response_text = f"Detected disease: {disease}"
310
+ disease_info = generate_response(f"Provide brief information about {disease} in plants", chat_history)
311
+ response_text += f"\n\nAdditional information: {disease_info}"
312
+ elif pest:
313
+ response_text = f"Detected pest: {pest}"
314
+ pest_info = generate_response(f"Provide brief information about {pest} in agriculture", chat_history)
315
+ response_text += f"\n\nAdditional information: {pest_info}"
316
+ else:
317
+ response_text = "Please upload another image with good quality."
318
+
319
  else:
320
+ filepath = download_and_save_as_txt(media_url, account_sid, auth_token)
321
+ response_text = query_rag(filepath)
322
+
323
+ elif ('weather' in incoming_msg.lower()) or ('climate' in incoming_msg.lower()) or (
324
+ 'temperature' in incoming_msg.lower()):
325
+ response_text = get_weather(incoming_msg.lower())
326
+ elif 'bookkeeping' in incoming_msg:
327
+ response_text = "Please provide the details you'd like to record."
328
+ elif ('rates' in incoming_msg.lower()) or ('price' in incoming_msg.lower()) or (
329
+ 'market' in incoming_msg.lower()) or ('rate' in incoming_msg.lower()) or ('prices' in incoming_msg.lower()):
330
+ rates = get_rates()
331
+ response_text = generate_response(incoming_msg + ' data is ' + rates, chat_history)
332
+ elif ('news' in incoming_msg.lower()) or ('information' in incoming_msg.lower()):
333
+ news = get_news()
334
+ response_text = generate_response(incoming_msg + ' data is ' + str(news), chat_history)
335
+ else:
336
+ # Check if the query is related to a PDF document
337
+ if 'from pdf' in incoming_msg or 'in pdf' in incoming_msg:
338
+ response_text = query_rag(incoming_msg)
339
+ else:
340
+ response_text = generate_response(incoming_msg, chat_history)
341
+
342
+ conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
343
+ send_message(sender, response_text)
344
+ return '', 204
345
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
  def send_message(to, body):
348
  try: