Spaces:
Sleeping
Sleeping
Neurolingua
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -16,12 +16,11 @@ from langchain.schema.document import Document
|
|
16 |
|
17 |
app = Flask(__name__)
|
18 |
UPLOAD_FOLDER = '/code/uploads'
|
19 |
-
if not os.path.exists(UPLOAD_FOLDER):
|
20 |
-
os.makedirs(UPLOAD_FOLDER) # Creates an 'uploads' directory in the current working directory
|
21 |
-
|
22 |
if not os.path.exists(UPLOAD_FOLDER):
|
23 |
os.makedirs(UPLOAD_FOLDER)
|
|
|
24 |
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
|
25 |
class ConversationBufferMemory:
|
26 |
def __init__(self, max_size=6):
|
27 |
self.memory = []
|
@@ -30,10 +29,11 @@ class ConversationBufferMemory:
|
|
30 |
def add_to_memory(self, interaction):
|
31 |
self.memory.append(interaction)
|
32 |
if len(self.memory) > self.max_size:
|
33 |
-
self.memory.pop(0)
|
34 |
|
35 |
def get_memory(self):
|
36 |
return self.memory
|
|
|
37 |
conversation_memory = ConversationBufferMemory(max_size=2)
|
38 |
|
39 |
account_sid = os.environ.get('TWILIO_ACCOUNT_SID')
|
@@ -45,15 +45,11 @@ CHROMA_PATH = "chroma"
|
|
45 |
DATA_PATH = "data"
|
46 |
PROMPT_TEMPLATE = """
|
47 |
Answer the question based only on the following context:
|
48 |
-
|
49 |
{context}
|
50 |
-
|
51 |
---
|
52 |
-
|
53 |
Answer the question based on the above context: {question}
|
54 |
"""
|
55 |
|
56 |
-
import os
|
57 |
from bs4 import BeautifulSoup
|
58 |
import requests
|
59 |
from requests.auth import HTTPBasicAuth
|
@@ -64,30 +60,26 @@ from urllib.parse import urlparse
|
|
64 |
import os
|
65 |
from pypdf import PdfReader
|
66 |
from ai71 import AI71
|
67 |
-
import
|
68 |
-
|
69 |
-
import pandas as pd
|
70 |
|
71 |
from inference_sdk import InferenceHTTPClient
|
72 |
import base64
|
73 |
|
74 |
-
|
75 |
-
|
76 |
AI71_API_KEY = os.environ.get('AI71_API_KEY')
|
77 |
-
|
|
|
78 |
response = ''
|
79 |
for chunk in AI71(AI71_API_KEY).chat.completions.create(
|
80 |
model="tiiuae/falcon-180b-chat",
|
81 |
messages=[
|
82 |
-
{"role": "system", "content": "You are
|
83 |
-
{"role": "user",
|
84 |
-
"content": f'''Answer the query based on history {chat_history}:{query}'''},
|
85 |
],
|
86 |
stream=True,
|
87 |
):
|
88 |
if chunk.choices[0].delta.content:
|
89 |
response += chunk.choices[0].delta.content
|
90 |
-
return response.replace("###", '').replace('\nUser:','')
|
91 |
|
92 |
def predict_pest(filepath):
|
93 |
CLIENT = InferenceHTTPClient(
|
@@ -108,27 +100,22 @@ def predict_disease(filepath):
|
|
108 |
|
109 |
def convert_img(url, account_sid, auth_token):
|
110 |
try:
|
111 |
-
# Make the request to the media URL with authentication
|
112 |
response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
|
113 |
-
response.raise_for_status()
|
114 |
|
115 |
-
# Determine a filename from the URL
|
116 |
parsed_url = urlparse(url)
|
117 |
-
media_id = parsed_url.path.split('/')[-1]
|
118 |
filename = f"downloaded_media_{media_id}"
|
119 |
|
120 |
-
# Save the media content to a file
|
121 |
media_filepath = os.path.join(UPLOAD_FOLDER, filename)
|
122 |
with open(media_filepath, 'wb') as file:
|
123 |
file.write(response.content)
|
124 |
|
125 |
print(f"Media downloaded successfully and saved as {media_filepath}")
|
126 |
|
127 |
-
# Convert the saved media file to an image
|
128 |
with open(media_filepath, 'rb') as img_file:
|
129 |
image = Image.open(img_file)
|
130 |
|
131 |
-
# Optionally, convert the image to JPG and save in UPLOAD_FOLDER
|
132 |
converted_filename = f"image.jpg"
|
133 |
converted_filepath = os.path.join(UPLOAD_FOLDER, converted_filename)
|
134 |
image.convert('RGB').save(converted_filepath, 'JPEG')
|
@@ -138,84 +125,57 @@ def convert_img(url, account_sid, auth_token):
|
|
138 |
print(f"HTTP error occurred: {err}")
|
139 |
except Exception as err:
|
140 |
print(f"An error occurred: {err}")
|
141 |
-
def get_weather(city):
|
142 |
-
city=city.strip()
|
143 |
-
city=city.replace(' ',"+")
|
144 |
-
r = requests.get(f'https://www.google.com/search?q=weather+in+{city}')
|
145 |
-
|
146 |
-
soup=BeautifulSoup(r.text,'html.parser')
|
147 |
-
temperature=soup.find('div',attrs={'class':'BNeawe iBp4i AP7Wnd'}).text
|
148 |
-
|
149 |
-
return (temperature)
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
from zenrows import ZenRowsClient
|
153 |
-
|
154 |
-
|
155 |
-
# Initialize ZenRows client with your API key
|
156 |
-
client = ZenRowsClient(str(Zenrow_api))
|
157 |
|
158 |
-
def get_rates():
|
159 |
url = "https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL"
|
|
|
160 |
|
161 |
-
# Fetch the webpage content using ZenRows
|
162 |
-
response = client.get(url)
|
163 |
-
|
164 |
-
# Check if the request was successful
|
165 |
if response.status_code == 200:
|
166 |
-
# Parse the raw HTML content with BeautifulSoup
|
167 |
soup = BeautifulSoup(response.content, 'html.parser')
|
168 |
-
|
169 |
-
# Find the table rows containing the data
|
170 |
rows = soup.select('table tbody tr')
|
171 |
data = {}
|
172 |
for row in rows:
|
173 |
-
# Extract commodity and price using BeautifulSoup
|
174 |
columns = row.find_all('td')
|
175 |
if len(columns) >= 2:
|
176 |
commodity = columns[0].get_text(strip=True)
|
177 |
price = columns[1].get_text(strip=True)
|
178 |
if '₹' in price:
|
179 |
data[commodity] = price
|
180 |
-
return str(data)+"
|
181 |
-
|
182 |
-
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
news=[] # URL to scrape
|
187 |
url = "https://economictimes.indiatimes.com/news/economy/agriculture?from=mdr"
|
|
|
188 |
|
189 |
-
# Fetch the webpage content using ZenRows
|
190 |
-
response = client.get(url)
|
191 |
-
|
192 |
-
# Check if the request was successful
|
193 |
if response.status_code == 200:
|
194 |
-
# Parse the raw HTML content with BeautifulSoup
|
195 |
soup = BeautifulSoup(response.content, 'html.parser')
|
196 |
-
|
197 |
-
# Find the table rows containing the data
|
198 |
headlines = soup.find_all("div", class_="eachStory")
|
199 |
for story in headlines:
|
200 |
-
# Extract the headline
|
201 |
headline = story.find('h3').text.strip()
|
202 |
news.append(headline)
|
203 |
return news
|
204 |
|
205 |
-
|
206 |
-
|
207 |
def download_and_save_as_txt(url, account_sid, auth_token):
|
208 |
try:
|
209 |
-
# Make the request to the media URL with authentication
|
210 |
response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
|
211 |
-
response.raise_for_status()
|
212 |
|
213 |
-
# Determine a filename from the URL
|
214 |
parsed_url = urlparse(url)
|
215 |
-
media_id = parsed_url.path.split('/')[-1]
|
216 |
filename = f"pdf_file.pdf"
|
217 |
|
218 |
-
# Save the media content to a .txt file
|
219 |
txt_filepath = os.path.join(UPLOAD_FOLDER, filename)
|
220 |
with open(txt_filepath, 'wb') as file:
|
221 |
file.write(response.content)
|
@@ -227,6 +187,7 @@ def download_and_save_as_txt(url, account_sid, auth_token):
|
|
227 |
print(f"HTTP error occurred: {err}")
|
228 |
except Exception as err:
|
229 |
print(f"An error occurred: {err}")
|
|
|
230 |
def query_rag(query_text: str):
|
231 |
embedding_function = get_embedding_function()
|
232 |
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
|
@@ -238,19 +199,15 @@ def query_rag(query_text: str):
|
|
238 |
response_text = model.invoke(prompt)
|
239 |
return response_text
|
240 |
|
241 |
-
|
242 |
def save_pdf_and_update_database(media_url):
|
243 |
-
# Download the PDF file
|
244 |
response = requests.get(media_url)
|
245 |
pdf_filename = os.path.join(DATA_PATH, f"{uuid.uuid4()}.pdf")
|
246 |
with open(pdf_filename, 'wb') as f:
|
247 |
f.write(response.content)
|
248 |
|
249 |
-
# Use PyPDFDirectoryLoader if you want to process multiple PDFs in a directory
|
250 |
document_loader = PyPDFDirectoryLoader(DATA_PATH)
|
251 |
documents = document_loader.load()
|
252 |
|
253 |
-
# The rest of your code remains the same
|
254 |
text_splitter = RecursiveCharacterTextSplitter(
|
255 |
chunk_size=800,
|
256 |
chunk_overlap=80,
|
@@ -261,11 +218,8 @@ def save_pdf_and_update_database(media_url):
|
|
261 |
|
262 |
add_to_chroma(chunks)
|
263 |
|
264 |
-
|
265 |
def add_to_chroma(chunks: list[Document]):
|
266 |
-
db = Chroma(
|
267 |
-
persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()
|
268 |
-
)
|
269 |
chunks_with_ids = calculate_chunk_ids(chunks)
|
270 |
existing_items = db.get(include=[])
|
271 |
existing_ids = set(existing_items["ids"])
|
@@ -277,7 +231,6 @@ def add_to_chroma(chunks: list[Document]):
|
|
277 |
db.add_documents(new_chunks, ids=new_chunk_ids)
|
278 |
db.persist()
|
279 |
|
280 |
-
|
281 |
def calculate_chunk_ids(chunks):
|
282 |
last_page_id = None
|
283 |
current_chunk_index = 0
|
@@ -299,7 +252,6 @@ def calculate_chunk_ids(chunks):
|
|
299 |
|
300 |
return chunks
|
301 |
|
302 |
-
|
303 |
@app.route('/whatsapp', methods=['POST'])
|
304 |
def whatsapp_webhook():
|
305 |
incoming_msg = request.values.get('Body', '').lower()
|
@@ -334,7 +286,6 @@ def whatsapp_webhook():
|
|
334 |
else:
|
335 |
response_text = "Please upload another image with good quality."
|
336 |
elif content_type == "application/pdf":
|
337 |
-
# Process the PDF and update the database
|
338 |
save_pdf_and_update_database(media_url)
|
339 |
response_text = "Your PDF has been saved and processed."
|
340 |
else:
|
@@ -359,7 +310,6 @@ def whatsapp_webhook():
|
|
359 |
send_message(sender, response_text)
|
360 |
return '', 204
|
361 |
|
362 |
-
|
363 |
def send_message(to, body):
|
364 |
try:
|
365 |
message = client.messages.create(
|
@@ -381,8 +331,6 @@ def send_initial_message(to_number):
|
|
381 |
'Welcome to the Agri AI Chatbot! How can I assist you today?'
|
382 |
)
|
383 |
|
384 |
-
|
385 |
if __name__ == '__main__':
|
386 |
-
#send_initial_message('916382792828')
|
387 |
send_initial_message('919080522395')
|
388 |
app.run(host='0.0.0.0', port=7860)
|
|
|
16 |
|
17 |
app = Flask(__name__)
|
18 |
UPLOAD_FOLDER = '/code/uploads'
|
|
|
|
|
|
|
19 |
if not os.path.exists(UPLOAD_FOLDER):
|
20 |
os.makedirs(UPLOAD_FOLDER)
|
21 |
+
|
22 |
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
23 |
+
|
24 |
class ConversationBufferMemory:
|
25 |
def __init__(self, max_size=6):
|
26 |
self.memory = []
|
|
|
29 |
def add_to_memory(self, interaction):
|
30 |
self.memory.append(interaction)
|
31 |
if len(self.memory) > self.max_size:
|
32 |
+
self.memory.pop(0)
|
33 |
|
34 |
def get_memory(self):
|
35 |
return self.memory
|
36 |
+
|
37 |
conversation_memory = ConversationBufferMemory(max_size=2)
|
38 |
|
39 |
account_sid = os.environ.get('TWILIO_ACCOUNT_SID')
|
|
|
45 |
DATA_PATH = "data"
|
46 |
PROMPT_TEMPLATE = """
|
47 |
Answer the question based only on the following context:
|
|
|
48 |
{context}
|
|
|
49 |
---
|
|
|
50 |
Answer the question based on the above context: {question}
|
51 |
"""
|
52 |
|
|
|
53 |
from bs4 import BeautifulSoup
|
54 |
import requests
|
55 |
from requests.auth import HTTPBasicAuth
|
|
|
60 |
import os
|
61 |
from pypdf import PdfReader
|
62 |
from ai71 import AI71
|
63 |
+
import uuid
|
|
|
|
|
64 |
|
65 |
from inference_sdk import InferenceHTTPClient
|
66 |
import base64
|
67 |
|
|
|
|
|
68 |
AI71_API_KEY = os.environ.get('AI71_API_KEY')
|
69 |
+
|
70 |
+
def generate_response(query, chat_history):
|
71 |
response = ''
|
72 |
for chunk in AI71(AI71_API_KEY).chat.completions.create(
|
73 |
model="tiiuae/falcon-180b-chat",
|
74 |
messages=[
|
75 |
+
{"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences. Greet the user if the user greets you."},
|
76 |
+
{"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''},
|
|
|
77 |
],
|
78 |
stream=True,
|
79 |
):
|
80 |
if chunk.choices[0].delta.content:
|
81 |
response += chunk.choices[0].delta.content
|
82 |
+
return response.replace("###", '').replace('\nUser:', '')
|
83 |
|
84 |
def predict_pest(filepath):
|
85 |
CLIENT = InferenceHTTPClient(
|
|
|
100 |
|
101 |
def convert_img(url, account_sid, auth_token):
|
102 |
try:
|
|
|
103 |
response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
|
104 |
+
response.raise_for_status()
|
105 |
|
|
|
106 |
parsed_url = urlparse(url)
|
107 |
+
media_id = parsed_url.path.split('/')[-1]
|
108 |
filename = f"downloaded_media_{media_id}"
|
109 |
|
|
|
110 |
media_filepath = os.path.join(UPLOAD_FOLDER, filename)
|
111 |
with open(media_filepath, 'wb') as file:
|
112 |
file.write(response.content)
|
113 |
|
114 |
print(f"Media downloaded successfully and saved as {media_filepath}")
|
115 |
|
|
|
116 |
with open(media_filepath, 'rb') as img_file:
|
117 |
image = Image.open(img_file)
|
118 |
|
|
|
119 |
converted_filename = f"image.jpg"
|
120 |
converted_filepath = os.path.join(UPLOAD_FOLDER, converted_filename)
|
121 |
image.convert('RGB').save(converted_filepath, 'JPEG')
|
|
|
125 |
print(f"HTTP error occurred: {err}")
|
126 |
except Exception as err:
|
127 |
print(f"An error occurred: {err}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
+
def get_weather(city):
|
130 |
+
city = city.strip().replace(' ', '+')
|
131 |
+
r = requests.get(f'https://www.google.com/search?q=weather+in+{city}')
|
132 |
+
soup = BeautifulSoup(r.text, 'html.parser')
|
133 |
+
temperature = soup.find('div', attrs={'class': 'BNeawe iBp4i AP7Wnd'}).text
|
134 |
+
return temperature
|
135 |
|
136 |
from zenrows import ZenRowsClient
|
137 |
+
Zenrow_api = os.environ.get('Zenrow_api')
|
138 |
+
zenrows_client = ZenRowsClient(Zenrow_api)
|
|
|
|
|
139 |
|
140 |
+
def get_rates():
|
141 |
url = "https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL"
|
142 |
+
response = zenrows_client.get(url)
|
143 |
|
|
|
|
|
|
|
|
|
144 |
if response.status_code == 200:
|
|
|
145 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
|
146 |
rows = soup.select('table tbody tr')
|
147 |
data = {}
|
148 |
for row in rows:
|
|
|
149 |
columns = row.find_all('td')
|
150 |
if len(columns) >= 2:
|
151 |
commodity = columns[0].get_text(strip=True)
|
152 |
price = columns[1].get_text(strip=True)
|
153 |
if '₹' in price:
|
154 |
data[commodity] = price
|
155 |
+
return str(data) + " These are the prices for 1 kg"
|
|
|
|
|
156 |
|
157 |
+
def get_news():
|
158 |
+
news = []
|
|
|
159 |
url = "https://economictimes.indiatimes.com/news/economy/agriculture?from=mdr"
|
160 |
+
response = zenrows_client.get(url)
|
161 |
|
|
|
|
|
|
|
|
|
162 |
if response.status_code == 200:
|
|
|
163 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
|
164 |
headlines = soup.find_all("div", class_="eachStory")
|
165 |
for story in headlines:
|
|
|
166 |
headline = story.find('h3').text.strip()
|
167 |
news.append(headline)
|
168 |
return news
|
169 |
|
|
|
|
|
170 |
def download_and_save_as_txt(url, account_sid, auth_token):
|
171 |
try:
|
|
|
172 |
response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token))
|
173 |
+
response.raise_for_status()
|
174 |
|
|
|
175 |
parsed_url = urlparse(url)
|
176 |
+
media_id = parsed_url.path.split('/')[-1]
|
177 |
filename = f"pdf_file.pdf"
|
178 |
|
|
|
179 |
txt_filepath = os.path.join(UPLOAD_FOLDER, filename)
|
180 |
with open(txt_filepath, 'wb') as file:
|
181 |
file.write(response.content)
|
|
|
187 |
print(f"HTTP error occurred: {err}")
|
188 |
except Exception as err:
|
189 |
print(f"An error occurred: {err}")
|
190 |
+
|
191 |
def query_rag(query_text: str):
|
192 |
embedding_function = get_embedding_function()
|
193 |
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
|
|
|
199 |
response_text = model.invoke(prompt)
|
200 |
return response_text
|
201 |
|
|
|
202 |
def save_pdf_and_update_database(media_url):
|
|
|
203 |
response = requests.get(media_url)
|
204 |
pdf_filename = os.path.join(DATA_PATH, f"{uuid.uuid4()}.pdf")
|
205 |
with open(pdf_filename, 'wb') as f:
|
206 |
f.write(response.content)
|
207 |
|
|
|
208 |
document_loader = PyPDFDirectoryLoader(DATA_PATH)
|
209 |
documents = document_loader.load()
|
210 |
|
|
|
211 |
text_splitter = RecursiveCharacterTextSplitter(
|
212 |
chunk_size=800,
|
213 |
chunk_overlap=80,
|
|
|
218 |
|
219 |
add_to_chroma(chunks)
|
220 |
|
|
|
221 |
def add_to_chroma(chunks: list[Document]):
|
222 |
+
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function())
|
|
|
|
|
223 |
chunks_with_ids = calculate_chunk_ids(chunks)
|
224 |
existing_items = db.get(include=[])
|
225 |
existing_ids = set(existing_items["ids"])
|
|
|
231 |
db.add_documents(new_chunks, ids=new_chunk_ids)
|
232 |
db.persist()
|
233 |
|
|
|
234 |
def calculate_chunk_ids(chunks):
|
235 |
last_page_id = None
|
236 |
current_chunk_index = 0
|
|
|
252 |
|
253 |
return chunks
|
254 |
|
|
|
255 |
@app.route('/whatsapp', methods=['POST'])
|
256 |
def whatsapp_webhook():
|
257 |
incoming_msg = request.values.get('Body', '').lower()
|
|
|
286 |
else:
|
287 |
response_text = "Please upload another image with good quality."
|
288 |
elif content_type == "application/pdf":
|
|
|
289 |
save_pdf_and_update_database(media_url)
|
290 |
response_text = "Your PDF has been saved and processed."
|
291 |
else:
|
|
|
310 |
send_message(sender, response_text)
|
311 |
return '', 204
|
312 |
|
|
|
313 |
def send_message(to, body):
|
314 |
try:
|
315 |
message = client.messages.create(
|
|
|
331 |
'Welcome to the Agri AI Chatbot! How can I assist you today?'
|
332 |
)
|
333 |
|
|
|
334 |
if __name__ == '__main__':
|
|
|
335 |
send_initial_message('919080522395')
|
336 |
app.run(host='0.0.0.0', port=7860)
|