|
from flask import Flask, request |
|
from twilio.twiml.messaging_response import MessagingResponse |
|
from twilio.rest import Client |
|
import os |
|
import requests |
|
from PIL import Image |
|
import shutil |
|
|
|
from langchain.vectorstores.chroma import Chroma |
|
from langchain.prompts import ChatPromptTemplate |
|
from langchain_community.llms.ollama import Ollama |
|
from get_embedding_function import get_embedding_function |
|
from langchain.document_loaders.pdf import PyPDFDirectoryLoader |
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
from langchain.schema.document import Document |
|
import tempfile |
|
|
|
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
UPLOAD_FOLDER = '/code/uploads' |
|
CHROMA_PATH = tempfile.mkdtemp() |
|
if not os.path.exists(UPLOAD_FOLDER): |
|
os.makedirs(UPLOAD_FOLDER) |
|
|
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
|
class ConversationBufferMemory: |
|
def __init__(self, max_size=6): |
|
self.memory = [] |
|
self.max_size = max_size |
|
|
|
def add_to_memory(self, interaction): |
|
self.memory.append(interaction) |
|
if len(self.memory) > self.max_size: |
|
self.memory.pop(0) |
|
|
|
def get_memory(self): |
|
return self.memory |
|
|
|
conversation_memory = ConversationBufferMemory(max_size=2) |
|
|
|
account_sid = os.environ.get('TWILIO_ACCOUNT_SID') |
|
auth_token = os.environ.get('TWILIO_AUTH_TOKEN') |
|
client = Client(account_sid, auth_token) |
|
from_whatsapp_number = 'whatsapp:+14155238886' |
|
|
|
PROMPT_TEMPLATE = """ |
|
Answer the question based only on the following context: |
|
{context} |
|
--- |
|
Answer the question based on the above context: {question} |
|
""" |
|
|
|
from bs4 import BeautifulSoup |
|
import requests |
|
from requests.auth import HTTPBasicAuth |
|
from PIL import Image |
|
from io import BytesIO |
|
import pandas as pd |
|
from urllib.parse import urlparse |
|
import os |
|
from pypdf import PdfReader |
|
from ai71 import AI71 |
|
import uuid |
|
|
|
from inference_sdk import InferenceHTTPClient |
|
import base64 |
|
|
|
AI71_API_KEY = os.environ.get('AI71_API_KEY') |
|
|
|
def generate_response(query, chat_history): |
|
response = '' |
|
for chunk in AI71(AI71_API_KEY).chat.completions.create( |
|
model="tiiuae/falcon-180b-chat", |
|
messages=[ |
|
{"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences. Greet the user if the user greets you."}, |
|
{"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''}, |
|
], |
|
stream=True, |
|
): |
|
if chunk.choices[0].delta.content: |
|
response += chunk.choices[0].delta.content |
|
return response.replace("###", '').replace('\nUser:', '') |
|
|
|
def predict_pest(filepath): |
|
CLIENT = InferenceHTTPClient( |
|
api_url="https://detect.roboflow.com", |
|
api_key="oF1aC4b1FBCDtK8CoKx7" |
|
) |
|
result = CLIENT.infer(filepath, model_id="pest-detection-ueoco/1") |
|
return result['predictions'][0] |
|
|
|
|
|
def predict_disease(filepath): |
|
CLIENT = InferenceHTTPClient( |
|
api_url="https://classify.roboflow.com", |
|
api_key="oF1aC4b1FBCDtK8CoKx7" |
|
) |
|
result = CLIENT.infer(filepath, model_id="plant-disease-detection-iefbi/1") |
|
return result['predicted_classes'][0] |
|
|
|
def convert_img(url, account_sid, auth_token): |
|
try: |
|
response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token)) |
|
response.raise_for_status() |
|
|
|
parsed_url = urlparse(url) |
|
media_id = parsed_url.path.split('/')[-1] |
|
filename = f"downloaded_media_{media_id}" |
|
|
|
media_filepath = os.path.join(UPLOAD_FOLDER, filename) |
|
with open(media_filepath, 'wb') as file: |
|
file.write(response.content) |
|
|
|
print(f"Media downloaded successfully and saved as {media_filepath}") |
|
|
|
with open(media_filepath, 'rb') as img_file: |
|
image = Image.open(img_file) |
|
|
|
converted_filename = f"image.jpg" |
|
converted_filepath = os.path.join(UPLOAD_FOLDER, converted_filename) |
|
image.convert('RGB').save(converted_filepath, 'JPEG') |
|
return converted_filepath |
|
|
|
except requests.exceptions.HTTPError as err: |
|
print(f"HTTP error occurred: {err}") |
|
except Exception as err: |
|
print(f"An error occurred: {err}") |
|
|
|
def get_weather(city): |
|
city = city.strip().replace(' ', '+') |
|
r = requests.get(f'https://www.google.com/search?q=weather+in+{city}') |
|
soup = BeautifulSoup(r.text, 'html.parser') |
|
temperature = soup.find('div', attrs={'class': 'BNeawe iBp4i AP7Wnd'}).text |
|
return temperature |
|
|
|
from zenrows import ZenRowsClient |
|
Zenrow_api = os.environ.get('Zenrow_api') |
|
zenrows_client = ZenRowsClient(Zenrow_api) |
|
|
|
def get_rates(): |
|
url = "https://www.kisandeals.com/mandiprices/ALL/TAMIL-NADU/ALL" |
|
response = zenrows_client.get(url) |
|
|
|
if response.status_code == 200: |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
rows = soup.select('table tbody tr') |
|
data = {} |
|
for row in rows: |
|
columns = row.find_all('td') |
|
if len(columns) >= 2: |
|
commodity = columns[0].get_text(strip=True) |
|
price = columns[1].get_text(strip=True) |
|
if '₹' in price: |
|
data[commodity] = price |
|
return str(data) + " These are the prices for 1 kg" |
|
|
|
def get_news(): |
|
news = [] |
|
url = "https://economictimes.indiatimes.com/news/economy/agriculture?from=mdr" |
|
response = zenrows_client.get(url) |
|
|
|
if response.status_code == 200: |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
headlines = soup.find_all("div", class_="eachStory") |
|
for story in headlines: |
|
headline = story.find('h3').text.strip() |
|
news.append(headline) |
|
return news |
|
|
|
def download_and_save_as_txt(url, account_sid, auth_token): |
|
try: |
|
response = requests.get(url, auth=HTTPBasicAuth(account_sid, auth_token)) |
|
response.raise_for_status() |
|
|
|
parsed_url = urlparse(url) |
|
media_id = parsed_url.path.split('/')[-1] |
|
filename = f"pdf_file.pdf" |
|
|
|
txt_filepath = os.path.join(UPLOAD_FOLDER, filename) |
|
with open(txt_filepath, 'wb') as file: |
|
file.write(response.content) |
|
|
|
print(f"Media downloaded successfully and saved as {txt_filepath}") |
|
return txt_filepath |
|
|
|
except requests.exceptions.HTTPError as err: |
|
print(f"HTTP error occurred: {err}") |
|
except Exception as err: |
|
print(f"An error occurred: {err}") |
|
|
|
|
|
def initialize_chroma(): |
|
try: |
|
|
|
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()) |
|
|
|
db.similarity_search_with_score("test query", k=1) |
|
print("Chroma initialized successfully.") |
|
except Exception as e: |
|
print(f"Error initializing Chroma: {e}") |
|
|
|
initialize_chroma() |
|
|
|
def query_rag(query_text: str): |
|
try: |
|
|
|
if not isinstance(query_text, str): |
|
raise ValueError("Query text must be a string.") |
|
|
|
|
|
embedding_function = get_embedding_function() |
|
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) |
|
|
|
|
|
results = db.similarity_search_with_score(query_text, k=5) |
|
|
|
|
|
context_texts = [doc.page_content for doc, _score in results] |
|
if not all(isinstance(text, str) for text in context_texts): |
|
raise ValueError("All context texts must be strings.") |
|
|
|
context_text = "\n\n---\n\n".join(context_texts) |
|
|
|
|
|
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE) |
|
prompt = prompt_template.format(context=context_text, question=query_text) |
|
|
|
|
|
response = '' |
|
for chunk in AI71(AI71_API_KEY).chat.completions.create( |
|
model="tiiuae/falcon-180b-chat", |
|
messages=[ |
|
{"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."}, |
|
{"role": "user", "content": f'Answer the following query based on the given context: {prompt}'}, |
|
], |
|
stream=True, |
|
): |
|
if chunk.choices[0].delta.content: |
|
response += chunk.choices[0].delta.content |
|
|
|
|
|
return response.replace("###", '').replace('\nUser:', '') |
|
|
|
except Exception as e: |
|
|
|
print(f"Error in query_rag: {e}") |
|
return "Sorry, there was an error processing your query." |
|
|
|
def download_file(url, extension): |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
filename = f"{uuid.uuid4()}{extension}" |
|
file_path = os.path.join(UPLOAD_FOLDER, filename) |
|
|
|
with open(file_path, 'wb') as file: |
|
file.write(response.content) |
|
|
|
print(f"File downloaded and saved as {file_path}") |
|
return file_path |
|
|
|
except requests.exceptions.HTTPError as err: |
|
print(f"HTTP error occurred: {err}") |
|
except Exception as err: |
|
print(f"An error occurred: {err}") |
|
return None |
|
def save_pdf_and_update_database(pdf_filepath): |
|
try: |
|
document_loader = PyPDFDirectoryLoader(UPLOAD_FOLDER) |
|
documents = document_loader.load() |
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=800, |
|
chunk_overlap=80, |
|
length_function=len, |
|
is_separator_regex=False, |
|
) |
|
chunks = text_splitter.split_documents(documents) |
|
|
|
add_to_chroma(chunks) |
|
print(f"PDF processed and data updated in Chroma.") |
|
except Exception as e: |
|
print(f"Error in processing PDF: {e}") |
|
|
|
def add_to_chroma(chunks: list[Document]): |
|
try: |
|
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()) |
|
chunks_with_ids = calculate_chunk_ids(chunks) |
|
existing_items = db.get(include=[]) |
|
existing_ids = set(existing_items["ids"]) |
|
|
|
new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids] |
|
|
|
if new_chunks: |
|
new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks] |
|
db.add_documents(new_chunks, ids=new_chunk_ids) |
|
db.persist() |
|
print(f"Chunks added to Chroma.") |
|
except Exception as e: |
|
print(f"Error adding chunks to Chroma: {e}") |
|
def calculate_chunk_ids(chunks): |
|
last_page_id = None |
|
current_chunk_index = 0 |
|
|
|
for chunk in chunks: |
|
source = chunk.metadata.get("source") |
|
page = chunk.metadata.get("page") |
|
current_page_id = f"{source}:{page}" |
|
|
|
if current_page_id == last_page_id: |
|
current_chunk_index += 1 |
|
else: |
|
current_chunk_index = 0 |
|
|
|
last_page_id = current_page_id |
|
chunk_id = f"{current_page_id}:{current_chunk_index}" |
|
chunk.metadata["id"] = chunk_id |
|
|
|
return chunks |
|
|
|
|
|
@app.route('/whatsapp', methods=['POST']) |
|
def whatsapp_webhook(): |
|
incoming_msg = request.values.get('Body', '').lower() |
|
sender = request.values.get('From') |
|
num_media = int(request.values.get('NumMedia', 0)) |
|
|
|
chat_history = conversation_memory.get_memory() |
|
|
|
if num_media > 0: |
|
media_url = request.values.get('MediaUrl0') |
|
response_text = media_url |
|
content_type = request.values.get('MediaContentType0') |
|
if content_type.startswith('image/'): |
|
filepath = convert_img(media_url, account_sid, auth_token) |
|
try: |
|
disease = predict_disease(filepath) |
|
except: |
|
disease = None |
|
try: |
|
pest = predict_pest(filepath) |
|
except: |
|
pest = None |
|
|
|
if disease: |
|
response_text = f"Detected disease: {disease}" |
|
disease_info = generate_response(f"Provide brief information about {disease} in plants", chat_history) |
|
response_text += f"\n\nAdditional information: {disease_info}" |
|
elif pest: |
|
response_text = f"Detected pest: {pest}" |
|
pest_info = generate_response(f"Provide brief information about {pest} in agriculture", chat_history) |
|
response_text += f"\n\nAdditional information: {pest_info}" |
|
else: |
|
response_text = "Please upload another image with good quality." |
|
|
|
else: |
|
filepath = download_and_save_as_txt(media_url, account_sid, auth_token) |
|
response_text = query_rag(filepath) |
|
|
|
elif ('weather' in incoming_msg.lower()) or ('climate' in incoming_msg.lower()) or ( |
|
'temperature' in incoming_msg.lower()): |
|
response_text = get_weather(incoming_msg.lower()) |
|
elif 'bookkeeping' in incoming_msg: |
|
response_text = "Please provide the details you'd like to record." |
|
elif ('rates' in incoming_msg.lower()) or ('price' in incoming_msg.lower()) or ( |
|
'market' in incoming_msg.lower()) or ('rate' in incoming_msg.lower()) or ('prices' in incoming_msg.lower()): |
|
rates = get_rates() |
|
response_text = generate_response(incoming_msg + ' data is ' + rates, chat_history) |
|
elif ('news' in incoming_msg.lower()) or ('information' in incoming_msg.lower()): |
|
news = get_news() |
|
response_text = generate_response(incoming_msg + ' data is ' + str(news), chat_history) |
|
else: |
|
|
|
if 'from pdf' in incoming_msg or 'in pdf' in incoming_msg: |
|
response_text = query_rag(incoming_msg) |
|
else: |
|
response_text = generate_response(incoming_msg, chat_history) |
|
|
|
conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text}) |
|
send_message(sender, response_text) |
|
return '', 204 |
|
|
|
|
|
def send_message(to, body): |
|
try: |
|
message = client.messages.create( |
|
from_=from_whatsapp_number, |
|
body=body, |
|
to=to |
|
) |
|
print(f"Message sent with SID: {message.sid}") |
|
except Exception as e: |
|
print(f"Error sending message: {e}") |
|
|
|
def send_initial_message(to_number): |
|
send_message( |
|
f'whatsapp:{to_number}', |
|
'Welcome to the Agri AI Chatbot! How can I assist you today? You can send an image with "pest" or "disease" to classify it.' |
|
) |
|
if __name__ == "__main__": |
|
send_initial_message('919080522395') |
|
send_initial_message('916382792828') |
|
app.run(host='0.0.0.0', port=7860) |
|
|