Neurolingua commited on
Commit
979aaae
1 Parent(s): 5dff670

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -157
app.py CHANGED
@@ -6,22 +6,23 @@ import requests
6
  from PIL import Image
7
  import shutil
8
 
9
- from langchain.vectorstores.chroma import Chroma
10
- from langchain.prompts import ChatPromptTemplate
11
- from langchain_community.llms.ollama import Ollama
12
- from get_embedding_function import get_embedding_function
13
- from langchain.document_loaders.pdf import PyPDFDirectoryLoader
14
- from langchain_text_splitters import RecursiveCharacterTextSplitter
15
- from langchain.schema.document import Document
16
- import tempfile
17
-
18
- # Create a temporary directory for Chroma if running in Hugging Face Spaces
19
-
20
 
 
 
21
 
22
  app = Flask(__name__)
23
  UPLOAD_FOLDER = '/code/uploads'
24
- CHROMA_PATH = tempfile.mkdtemp() # Use the same folder for Chroma
25
  if not os.path.exists(UPLOAD_FOLDER):
26
  os.makedirs(UPLOAD_FOLDER)
27
 
@@ -54,21 +55,6 @@ Answer the question based only on the following context:
54
  Answer the question based on the above context: {question}
55
  """
56
 
57
- from bs4 import BeautifulSoup
58
- import requests
59
- from requests.auth import HTTPBasicAuth
60
- from PIL import Image
61
- from io import BytesIO
62
- import pandas as pd
63
- from urllib.parse import urlparse
64
- import os
65
- from pypdf import PdfReader
66
- from ai71 import AI71
67
- import uuid
68
-
69
- from inference_sdk import InferenceHTTPClient
70
- import base64
71
-
72
  AI71_API_KEY = os.environ.get('AI71_API_KEY')
73
 
74
  def generate_response(query, chat_history):
@@ -192,56 +178,6 @@ def download_and_save_as_txt(url, account_sid, auth_token):
192
  except Exception as err:
193
  print(f"An error occurred: {err}")
194
 
195
-
196
- def initialize_chroma():
197
- try:
198
- # Initialize Chroma
199
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
200
- # Perform an initial operation to ensure it works
201
- db.similarity_search_with_score("test query", k=1)
202
- print("Chroma initialized successfully.")
203
- except Exception as e:
204
- print(f"Error initializing Chroma: {e}")
205
-
206
- initialize_chroma()
207
-
208
- def query_rag(query_text: str):
209
- embedding_function = get_embedding_function()
210
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
211
- print(query_text)
212
- # Check if the query is related to a PDF
213
- if "from pdf" in query_text.lower() or "in pdf" in query_text.lower():
214
- # Provide some context about handling PDFs
215
- response_text = "I see you're asking about a PDF-related query. Let me check the context from the PDF."
216
- else:
217
- # Regular RAG functionality
218
- response_text = "Your query is not related to PDFs. Please make sure your question is clear."
219
-
220
- results = db.similarity_search_with_score(query_text, k=5)
221
-
222
- if not results:
223
- response_text = "Sorry, I couldn't find any relevant information."
224
- else:
225
- context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
226
- prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
227
- prompt = prompt_template.format(context=context_text, question=query_text)
228
-
229
- response = ''
230
- for chunk in AI71(AI71_API_KEY).chat.completions.create(
231
- model="tiiuae/falcon-180b-chat",
232
- messages=[
233
- {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
234
- {"role": "user", "content": f'''Answer the following query based on the given context: {prompt}'''},
235
- ],
236
- stream=True,
237
- ):
238
- if chunk.choices[0].delta.content:
239
- response += chunk.choices[0].delta.content
240
-
241
- response_text = response.replace("###", '').replace('\nUser:', '')
242
-
243
- return response_text
244
-
245
  def download_file(url, extension):
246
  try:
247
  response = requests.get(url)
@@ -260,64 +196,6 @@ def download_file(url, extension):
260
  except Exception as err:
261
  print(f"An error occurred: {err}")
262
  return None
263
- def save_pdf_and_update_database(pdf_filepath):
264
- try:
265
- document_loader = PyPDFDirectoryLoader(UPLOAD_FOLDER)
266
- documents = document_loader.load()
267
-
268
- text_splitter = RecursiveCharacterTextSplitter(
269
- chunk_size=800,
270
- chunk_overlap=80,
271
- length_function=len,
272
- is_separator_regex=False,
273
- )
274
- chunks = text_splitter.split_documents(documents)
275
-
276
- add_to_chroma(chunks)
277
- print(f"PDF processed and data updated in Chroma.")
278
- except Exception as e:
279
- print(f"Error in processing PDF: {e}")
280
-
281
- def load_documents():
282
- document_loader = PyPDFDirectoryLoader(DATA_PATH)
283
- return document_loader.load()
284
-
285
- def add_to_chroma(chunks: list[Document]):
286
- try:
287
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
288
- chunks_with_ids = calculate_chunk_ids(chunks)
289
- existing_items = db.get(include=[])
290
- existing_ids = set(existing_items["ids"])
291
-
292
- new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]
293
-
294
- if new_chunks:
295
- new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
296
- db.add_documents(new_chunks, ids=new_chunk_ids)
297
- db.persist()
298
- print(f"Chunks added to Chroma.")
299
- except Exception as e:
300
- print(f"Error adding chunks to Chroma: {e}")
301
- def calculate_chunk_ids(chunks):
302
- last_page_id = None
303
- current_chunk_index = 0
304
-
305
- for chunk in chunks:
306
- source = chunk.metadata.get("source")
307
- page = chunk.metadata.get("page")
308
- current_page_id = f"{source}:{page}"
309
-
310
- if current_page_id == last_page_id:
311
- current_chunk_index += 1
312
- else:
313
- current_chunk_index = 0
314
-
315
- last_page_id = current_page_id
316
- chunk_id = f"{current_page_id}:{current_chunk_index}"
317
- chunk.metadata["id"] = chunk_id
318
-
319
- return chunks
320
-
321
 
322
  @app.route('/whatsapp', methods=['POST'])
323
  def whatsapp_webhook():
@@ -352,7 +230,7 @@ def whatsapp_webhook():
352
  news = get_news()
353
  response_text = generate_response(incoming_msg + ' data is ' + str(news), chat_history)
354
  else:
355
- response_text = query_rag(incoming_msg)
356
 
357
  conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
358
  send_message(sender, response_text)
@@ -370,34 +248,37 @@ def handle_image(filepath):
370
 
371
  if disease:
372
  response_text = f"Detected disease: {disease}"
373
- disease_info = generate_response(f"Provide brief information about {disease} in plants", chat_history)
374
- response_text += f"\n\nAdditional information: {disease_info}"
375
  elif pest:
376
  response_text = f"Detected pest: {pest}"
377
- pest_info = generate_response(f"Provide brief information about {pest} in agriculture", chat_history)
378
- response_text += f"\n\nAdditional information: {pest_info}"
379
  else:
380
- response_text = "Please upload another image with good quality."
381
-
382
  return response_text
383
 
384
  def process_and_query_pdf(filepath):
385
- # Assuming the PDF processing and embedding are handled here.
386
- add_to_chroma(load_documents())
387
- return query_rag("from pdf") # Replace with a more specific query if needed
388
-
 
 
 
 
 
 
 
 
389
 
390
- def send_message(to, body):
391
- try:
392
- message = client.messages.create(
393
- from_=from_whatsapp_number,
394
- body=body,
395
- to=to
396
- )
397
- print(f"Message sent with SID: {message.sid}")
398
- except Exception as e:
399
- print(f"Error sending message: {e}")
400
-
401
  def send_initial_message(to_number):
402
  send_message(
403
  f'whatsapp:{to_number}',
 
6
  from PIL import Image
7
  import shutil
8
 
9
+ from bs4 import BeautifulSoup
10
+ import requests
11
+ from requests.auth import HTTPBasicAuth
12
+ from PIL import Image
13
+ from io import BytesIO
14
+ import pandas as pd
15
+ from urllib.parse import urlparse
16
+ import os
17
+ from pypdf import PdfReader
18
+ from ai71 import AI71
19
+ import uuid
20
 
21
+ from inference_sdk import InferenceHTTPClient
22
+ import base64
23
 
24
  app = Flask(__name__)
25
  UPLOAD_FOLDER = '/code/uploads'
 
26
  if not os.path.exists(UPLOAD_FOLDER):
27
  os.makedirs(UPLOAD_FOLDER)
28
 
 
55
  Answer the question based on the above context: {question}
56
  """
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  AI71_API_KEY = os.environ.get('AI71_API_KEY')
59
 
60
  def generate_response(query, chat_history):
 
178
  except Exception as err:
179
  print(f"An error occurred: {err}")
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  def download_file(url, extension):
182
  try:
183
  response = requests.get(url)
 
196
  except Exception as err:
197
  print(f"An error occurred: {err}")
198
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  @app.route('/whatsapp', methods=['POST'])
201
  def whatsapp_webhook():
 
230
  news = get_news()
231
  response_text = generate_response(incoming_msg + ' data is ' + str(news), chat_history)
232
  else:
233
+ response_text = generate_response(incoming_msg, chat_history)
234
 
235
  conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
236
  send_message(sender, response_text)
 
248
 
249
  if disease:
250
  response_text = f"Detected disease: {disease}"
251
+ disease_info = generate_response(f"Provide brief information about {disease} in agriculture", "")
252
+ response_text += "\n" + disease_info
253
  elif pest:
254
  response_text = f"Detected pest: {pest}"
255
+ pest_info = generate_response(f"Provide brief information about {pest} in agriculture", "")
256
+ response_text += "\n" + pest_info
257
  else:
258
+ response_text = "Sorry, I couldn't detect any disease or pest. Please try another image."
259
+
260
  return response_text
261
 
262
  def process_and_query_pdf(filepath):
263
+ # Read and process the PDF
264
+ reader = PdfReader(filepath)
265
+ text = ''
266
+ for page in reader.pages:
267
+ text += page.extract_text()
268
+
269
+ if not text:
270
+ return "Sorry, the PDF content could not be extracted."
271
+
272
+ # Generate response based on extracted PDF content
273
+ response_text = generate_response(f"The PDF content is {text}", "")
274
+ return response_text
275
 
276
+ def send_message(recipient, message):
277
+ client.messages.create(
278
+ body=message,
279
+ from_=from_whatsapp_number,
280
+ to=recipient
281
+ )
 
 
 
 
 
282
  def send_initial_message(to_number):
283
  send_message(
284
  f'whatsapp:{to_number}',