AgriChatbot / app.py
Neurolingua's picture
Update app.py
1d239e0 verified
raw
history blame
3.82 kB
import os
CHROMA_PATH = '/code/chroma_db'
if not os.path.exists(CHROMA_PATH):
os.makedirs(CHROMA_PATH)
from langchain.vectorstores.chroma import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Load and process the PDF
def save_pdf_and_update_database(pdf_filepath):
try:
# Load the PDF
document_loader = PyPDFLoader(pdf_filepath)
documents = document_loader.load()
# Split the documents into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=80,
length_function=len,
is_separator_regex=False,
)
chunks = text_splitter.split_documents(documents)
# Initialize Chroma with an embedding function
embedding_function = HuggingFaceEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
# Add chunks to ChromaDB
db.add_documents(chunks)
db.persist()
print("PDF processed and data updated in Chroma.")
except Exception as e:
print(f"Error processing PDF: {e}")
AI71_API_KEY = os.environ.get('AI71_API_KEY')
def generate_response(query, chat_history):
response = ''
for chunk in AI71(AI71_API_KEY).chat.completions.create(
model="tiiuae/falcon-180b-chat",
messages=[
{"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
{"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''},
],
stream=True,
):
if chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
return response.replace("###", '').replace('\nUser:', '')
def query_rag(query_text: str, chat_history):
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=HuggingFaceEmbeddings())
# Perform a similarity search in ChromaDB
results = db.similarity_search_with_score(query_text, k=5)
if not results:
return "Sorry, I couldn't find any relevant information."
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
# Generate the response using the Falcon model
prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
response = generate_response(prompt, chat_history)
return response
@app.route('/whatsapp', methods=['POST'])
def whatsapp_webhook():
incoming_msg = request.values.get('Body', '').lower()
sender = request.values.get('From')
num_media = int(request.values.get('NumMedia', 0))
chat_history = conversation_memory.get_memory()
if num_media > 0:
media_url = request.values.get('MediaUrl0')
content_type = request.values.get('MediaContentType0')
if content_type == 'application/pdf':
# Handle PDF processing
filepath = download_file(media_url, ".pdf")
save_pdf_and_update_database(filepath)
response_text = "PDF has been processed. You can now ask questions related to its content."
else:
response_text = "Unsupported file type. Please upload a PDF document."
else:
# Handle queries
response_text = query_rag(incoming_msg, chat_history)
conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
send_message(sender, response_text)
return '', 204
if __name__ == "__main__":
send_initial_message('919080522395')
send_initial_message('916382792828')
app.run(host='0.0.0.0', port=7860)