Neurolingua commited on
Commit
1d239e0
1 Parent(s): c36a14b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -358
app.py CHANGED
@@ -1,73 +1,39 @@
1
- from flask import Flask, request
2
- from twilio.twiml.messaging_response import MessagingResponse
3
- from twilio.rest import Client
4
  import os
5
- import requests
6
- from PIL import Image
7
- import shutil
8
 
 
 
 
9
  from langchain.vectorstores.chroma import Chroma
10
- from langchain.prompts import ChatPromptTemplate
11
- from langchain_community.llms.ollama import Ollama
12
- from get_embedding_function import get_embedding_function
13
- from langchain.document_loaders.pdf import PyPDFDirectoryLoader
14
- from langchain_text_splitters import RecursiveCharacterTextSplitter
15
- from langchain.schema.document import Document
16
- import tempfile
17
 
18
- # Create a temporary directory for Chroma if running in Hugging Face Spaces
19
-
20
-
21
-
22
- app = Flask(__name__)
23
- UPLOAD_FOLDER = '/code/uploads'
24
- CHROMA_PATH = tempfile.mkdtemp() # Use the same folder for Chroma
25
- if not os.path.exists(UPLOAD_FOLDER):
26
- os.makedirs(UPLOAD_FOLDER)
27
-
28
- app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
29
-
30
- class ConversationBufferMemory:
31
- def __init__(self, max_size=6):
32
- self.memory = []
33
- self.max_size = max_size
34
-
35
- def add_to_memory(self, interaction):
36
- self.memory.append(interaction)
37
- if len(self.memory) > self.max_size:
38
- self.memory.pop(0)
39
-
40
- def get_memory(self):
41
- return self.memory
42
-
43
- conversation_memory = ConversationBufferMemory(max_size=2)
44
-
45
- account_sid = os.environ.get('TWILIO_ACCOUNT_SID')
46
- auth_token = os.environ.get('TWILIO_AUTH_TOKEN')
47
- client = Client(account_sid, auth_token)
48
- from_whatsapp_number = 'whatsapp:+14155238886'
49
-
50
- PROMPT_TEMPLATE = """
51
- Answer the question based only on the following context:
52
- {context}
53
- ---
54
- Answer the question based on the above context: {question}
55
- """
56
-
57
- from bs4 import BeautifulSoup
58
- import requests
59
- from requests.auth import HTTPBasicAuth
60
- from PIL import Image
61
- from io import BytesIO
62
- import pandas as pd
63
- from urllib.parse import urlparse
64
- import os
65
- from pypdf import PdfReader
66
- from ai71 import AI71
67
- import uuid
68
-
69
- from inference_sdk import InferenceHTTPClient
70
- import base64
71
 
72
  AI71_API_KEY = os.environ.get('AI71_API_KEY')
73
 
@@ -76,7 +42,7 @@ def generate_response(query, chat_history):
76
  for chunk in AI71(AI71_API_KEY).chat.completions.create(
77
  model="tiiuae/falcon-180b-chat",
78
  messages=[
79
- {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences. Greet the user if the user greets you."},
80
  {"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''},
81
  ],
82
  stream=True,
@@ -85,238 +51,22 @@ def generate_response(query, chat_history):
85
  response += chunk.choices[0].delta.content
86
  return response.replace("###", '').replace('\nUser:', '')
87
 
88
- def predict_pest(filepath):
89
- CLIENT = InferenceHTTPClient(
90
- api_url="https://detect.roboflow.com",
91
- api_key="oF1aC4b1FBCDtK8CoKx7"
92
- )
93
- result = CLIENT.infer(filepath, model_id="pest-detection-ueoco/1")
94
- return result['predictions'][0]
95
 
96
-
97
- def predict_disease(filepath):
98
- CLIENT = InferenceHTTPClient(
99
- api_url="https://classify.roboflow.com",
100
- api_key="oF1aC4b1FBCDtK8CoKx7"
101
- )
102
- result = CLIENT.infer(filepath, model_id="plant-disease-detection-iefbi/1")
103
- return result['predicted_classes'][0]
104
-
105
- def convert_img(url, account_sid, auth_token):
106
- try:
107
- response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
108
- response.raise_for_status()
109
-
110
- parsed_url = urlparse(url)
111
- media_id = parsed_url.path.split('/')[-1]
112
- filename = f"downloaded_media_{media_id}"
113
-
114
- media_filepath = os.path.join(UPLOAD_FOLDER, filename)
115
- with open(media_filepath, 'wb') as file:
116
- file.write(response.content)
117
-
118
- print(f"Media downloaded successfully and saved as {media_filepath}")
119
-
120
- with open(media_filepath, 'rb') as img_file:
121
- image = Image.open(img_file)
122
-
123
- converted_filename = f"image.jpg"
124
- converted_filepath = os.path.join(UPLOAD_FOLDER, converted_filename)
125
- image.convert('RGB').save(converted_filepath, 'JPEG')
126
- return converted_filepath
127
-
128
- except requests.exceptions.HTTPError as err:
129
- print(f"HTTP error occurred: {err}")
130
- except Exception as err:
131
- print(f"An error occurred: {err}")
132
-
133
- def get_weather(city):
134
- city = city.strip().replace(' ', '+')
135
- r = requests.get(f'https://www.google.com/search?q=weather+in+{city}')
136
- soup = BeautifulSoup(r.text, 'html.parser')
137
- temperature = soup.find('div', attrs={'class': 'BNeawe iBp4i AP7Wnd'}).text
138
- return temperature
139
-
140
- from zenrows import ZenRowsClient
141
- Zenrow_api = os.environ.get('Zenrow_api')
142
- zenrows_client = ZenRowsClient(Zenrow_api)
143
-
144
- def get_rates():
145
- url = "https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL"
146
- response = zenrows_client.get(url)
147
-
148
- if response.status_code == 200:
149
- soup = BeautifulSoup(response.content, 'html.parser')
150
- rows = soup.select('table tbody tr')
151
- data = {}
152
- for row in rows:
153
- columns = row.find_all('td')
154
- if len(columns) >= 2:
155
- commodity = columns[0].get_text(strip=True)
156
- price = columns[1].get_text(strip=True)
157
- if '₹' in price:
158
- data[commodity] = price
159
- return str(data) + " These are the prices for 1 kg"
160
-
161
- def get_news():
162
- news = []
163
- url = "https://economictimes.indiatimes.com/news/economy/agriculture?from=mdr"
164
- response = zenrows_client.get(url)
165
-
166
- if response.status_code == 200:
167
- soup = BeautifulSoup(response.content, 'html.parser')
168
- headlines = soup.find_all("div", class_="eachStory")
169
- for story in headlines:
170
- headline = story.find('h3').text.strip()
171
- news.append(headline)
172
- return news
173
-
174
- def download_and_save_as_txt(url, account_sid, auth_token):
175
- try:
176
- response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
177
- response.raise_for_status()
178
-
179
- parsed_url = urlparse(url)
180
- media_id = parsed_url.path.split('/')[-1]
181
- filename = f"pdf_file.pdf"
182
-
183
- txt_filepath = os.path.join(UPLOAD_FOLDER, filename)
184
- with open(txt_filepath, 'wb') as file:
185
- file.write(response.content)
186
-
187
- print(f"Media downloaded successfully and saved as {txt_filepath}")
188
- return txt_filepath
189
-
190
- except requests.exceptions.HTTPError as err:
191
- print(f"HTTP error occurred: {err}")
192
- except Exception as err:
193
- print(f"An error occurred: {err}")
194
-
195
-
196
- def initialize_chroma():
197
- try:
198
- # Initialize Chroma
199
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
200
- # Perform an initial operation to ensure it works
201
- db.similarity_search_with_score("test query", k=1)
202
- print("Chroma initialized successfully.")
203
- except Exception as e:
204
- print(f"Error initializing Chroma: {e}")
205
-
206
- initialize_chroma()
207
-
208
- def query_rag(query_text: str):
209
- embedding_function = get_embedding_function()
210
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
211
- print(query_text)
212
- # Check if the query is related to a PDF
213
- if "from pdf" in query_text.lower() or "in pdf" in query_text.lower():
214
- # Provide some context about handling PDFs
215
- response_text = "I see you're asking about a PDF-related query. Let me check the context from the PDF."
216
- else:
217
- # Regular RAG functionality
218
- response_text = "Your query is not related to PDFs. Please make sure your question is clear."
219
-
220
  results = db.similarity_search_with_score(query_text, k=5)
221
 
222
  if not results:
223
- response_text = "Sorry, I couldn't find any relevant information."
224
- else:
225
- context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
226
- prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
227
- prompt = prompt_template.format(context=context_text, question=query_text)
228
-
229
- response = ''
230
- for chunk in AI71(AI71_API_KEY).chat.completions.create(
231
- model="tiiuae/falcon-180b-chat",
232
- messages=[
233
- {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
234
- {"role": "user", "content": f'''Answer the following query based on the given context: {prompt}'''},
235
- ],
236
- stream=True,
237
- ):
238
- if chunk.choices[0].delta.content:
239
- response += chunk.choices[0].delta.content
240
-
241
- response_text = response.replace("###", '').replace('\nUser:', '')
242
-
243
- return response_text
244
 
245
- def download_file(url, extension):
246
- try:
247
- response = requests.get(url)
248
- response.raise_for_status()
249
- filename = f"{uuid.uuid4()}{extension}"
250
- file_path = os.path.join(UPLOAD_FOLDER, filename)
251
-
252
- with open(file_path, 'wb') as file:
253
- file.write(response.content)
254
-
255
- print(f"File downloaded and saved as {file_path}")
256
- return file_path
257
-
258
- except requests.exceptions.HTTPError as err:
259
- print(f"HTTP error occurred: {err}")
260
- except Exception as err:
261
- print(f"An error occurred: {err}")
262
- return None
263
- def save_pdf_and_update_database(pdf_filepath):
264
- try:
265
- document_loader = PyPDFDirectoryLoader(UPLOAD_FOLDER)
266
- documents = document_loader.load()
267
-
268
- text_splitter = RecursiveCharacterTextSplitter(
269
- chunk_size=800,
270
- chunk_overlap=80,
271
- length_function=len,
272
- is_separator_regex=False,
273
- )
274
- chunks = text_splitter.split_documents(documents)
275
-
276
- add_to_chroma(chunks)
277
- print(f"PDF processed and data updated in Chroma.")
278
- except Exception as e:
279
- print(f"Error in processing PDF: {e}")
280
-
281
- def load_documents():
282
- document_loader = PyPDFDirectoryLoader(DATA_PATH)
283
- return document_loader.load()
284
-
285
- def add_to_chroma(chunks: list[Document]):
286
- try:
287
- db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
288
- chunks_with_ids = calculate_chunk_ids(chunks)
289
- existing_items = db.get(include=[])
290
- existing_ids = set(existing_items["ids"])
291
-
292
- new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]
293
-
294
- if new_chunks:
295
- new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
296
- db.add_documents(new_chunks, ids=new_chunk_ids)
297
- db.persist()
298
- print(f"Chunks added to Chroma.")
299
- except Exception as e:
300
- print(f"Error adding chunks to Chroma: {e}")
301
- def calculate_chunk_ids(chunks):
302
- last_page_id = None
303
- current_chunk_index = 0
304
-
305
- for chunk in chunks:
306
- source = chunk.metadata.get("source")
307
- page = chunk.metadata.get("page")
308
- current_page_id = f"{source}:{page}"
309
-
310
- if current_page_id == last_page_id:
311
- current_chunk_index += 1
312
- else:
313
- current_chunk_index = 0
314
-
315
- last_page_id = current_page_id
316
- chunk_id = f"{current_page_id}:{current_chunk_index}"
317
- chunk.metadata["id"] = chunk_id
318
-
319
- return chunks
320
 
321
 
322
  @app.route('/whatsapp', methods=['POST'])
@@ -331,78 +81,22 @@ def whatsapp_webhook():
331
  media_url = request.values.get('MediaUrl0')
332
  content_type = request.values.get('MediaContentType0')
333
 
334
- if content_type.startswith('image/'):
335
- # Handle image processing (disease/pest detection)
336
- filepath = convert_img(media_url, account_sid, auth_token)
337
- response_text = handle_image(filepath)
338
- else:
339
  # Handle PDF processing
340
- filepath = download_and_save_as_txt(media_url, account_sid, auth_token)
341
- response_text = process_and_query_pdf(filepath)
342
- elif ('weather' in incoming_msg.lower()) or ('climate' in incoming_msg.lower()) or (
343
- 'temperature' in incoming_msg.lower()):
344
- response_text = get_weather(incoming_msg.lower())
345
- elif 'bookkeeping' in incoming_msg:
346
- response_text = "Please provide the details you'd like to record."
347
- elif ('rates' in incoming_msg.lower()) or ('price' in incoming_msg.lower()) or (
348
- 'market' in incoming_msg.lower()) or ('rate' in incoming_msg.lower()) or ('prices' in incoming_msg.lower()):
349
- rates = get_rates()
350
- response_text = generate_response(incoming_msg + ' data is ' + rates, chat_history)
351
- elif ('news' in incoming_msg.lower()) or ('information' in incoming_msg.lower()):
352
- news = get_news()
353
- response_text = generate_response(incoming_msg + ' data is ' + str(news), chat_history)
354
  else:
355
- response_text = query_rag(incoming_msg)
 
356
 
357
  conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
358
  send_message(sender, response_text)
359
  return '', 204
360
 
361
- def handle_image(filepath):
362
- try:
363
- disease = predict_disease(filepath)
364
- except:
365
- disease = None
366
- try:
367
- pest = predict_pest(filepath)
368
- except:
369
- pest = None
370
-
371
- if disease:
372
- response_text = f"Detected disease: {disease}"
373
- disease_info = generate_response(f"Provide brief information about {disease} in plants", chat_history)
374
- response_text += f"\n\nAdditional information: {disease_info}"
375
- elif pest:
376
- response_text = f"Detected pest: {pest}"
377
- pest_info = generate_response(f"Provide brief information about {pest} in agriculture", chat_history)
378
- response_text += f"\n\nAdditional information: {pest_info}"
379
- else:
380
- response_text = "Please upload another image with good quality."
381
-
382
- return response_text
383
-
384
- def process_and_query_pdf(filepath):
385
- # Assuming the PDF processing and embedding are handled here.
386
- add_to_chroma(load_documents())
387
- return query_rag("from pdf") # Replace with a more specific query if needed
388
-
389
 
390
- def send_message(to, body):
391
- try:
392
- message = client.messages.create(
393
- from_=from_whatsapp_number,
394
- body=body,
395
- to=to
396
- )
397
- print(f"Message sent with SID: {message.sid}")
398
- except Exception as e:
399
- print(f"Error sending message: {e}")
400
-
401
- def send_initial_message(to_number):
402
- send_message(
403
- f'whatsapp:{to_number}',
404
- 'Welcome to the Agri AI Chatbot! How can I assist you today? You can send an image with "pest" or "disease" to classify it.'
405
- )
406
  if __name__ == "__main__":
407
  send_initial_message('919080522395')
408
  send_initial_message('916382792828')
 
 
 
 
1
  import os
 
 
 
2
 
3
+ CHROMA_PATH = '/code/chroma_db'
4
+ if not os.path.exists(CHROMA_PATH):
5
+ os.makedirs(CHROMA_PATH)
6
  from langchain.vectorstores.chroma import Chroma
7
+ from langchain.document_loaders import PyPDFLoader
8
+ from langchain.embeddings import HuggingFaceEmbeddings
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
 
 
10
 
11
+ # Load and process the PDF
12
+ def save_pdf_and_update_database(pdf_filepath):
13
+ try:
14
+ # Load the PDF
15
+ document_loader = PyPDFLoader(pdf_filepath)
16
+ documents = document_loader.load()
17
+
18
+ # Split the documents into manageable chunks
19
+ text_splitter = RecursiveCharacterTextSplitter(
20
+ chunk_size=800,
21
+ chunk_overlap=80,
22
+ length_function=len,
23
+ is_separator_regex=False,
24
+ )
25
+ chunks = text_splitter.split_documents(documents)
26
+
27
+ # Initialize Chroma with an embedding function
28
+ embedding_function = HuggingFaceEmbeddings()
29
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
30
+
31
+ # Add chunks to ChromaDB
32
+ db.add_documents(chunks)
33
+ db.persist()
34
+ print("PDF processed and data updated in Chroma.")
35
+ except Exception as e:
36
+ print(f"Error processing PDF: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  AI71_API_KEY = os.environ.get('AI71_API_KEY')
39
 
 
42
  for chunk in AI71(AI71_API_KEY).chat.completions.create(
43
  model="tiiuae/falcon-180b-chat",
44
  messages=[
45
+ {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
46
  {"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''},
47
  ],
48
  stream=True,
 
51
  response += chunk.choices[0].delta.content
52
  return response.replace("###", '').replace('\nUser:', '')
53
 
54
+ def query_rag(query_text: str, chat_history):
55
+ db = Chroma(persist_directory=CHROMA_PATH, embedding_function=HuggingFaceEmbeddings())
 
 
 
 
 
56
 
57
+ # Perform a similarity search in ChromaDB
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  results = db.similarity_search_with_score(query_text, k=5)
59
 
60
  if not results:
61
+ return "Sorry, I couldn't find any relevant information."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
64
+
65
+ # Generate the response using the Falcon model
66
+ prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
67
+ response = generate_response(prompt, chat_history)
68
+
69
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  @app.route('/whatsapp', methods=['POST'])
 
81
  media_url = request.values.get('MediaUrl0')
82
  content_type = request.values.get('MediaContentType0')
83
 
84
+ if content_type == 'application/pdf':
 
 
 
 
85
  # Handle PDF processing
86
+ filepath = download_file(media_url, ".pdf")
87
+ save_pdf_and_update_database(filepath)
88
+ response_text = "PDF has been processed. You can now ask questions related to its content."
89
+ else:
90
+ response_text = "Unsupported file type. Please upload a PDF document."
 
 
 
 
 
 
 
 
 
91
  else:
92
+ # Handle queries
93
+ response_text = query_rag(incoming_msg, chat_history)
94
 
95
  conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
96
  send_message(sender, response_text)
97
  return '', 204
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  if __name__ == "__main__":
101
  send_initial_message('919080522395')
102
  send_initial_message('916382792828')