import os CHROMA_PATH = '/code/chroma_db' if not os.path.exists(CHROMA_PATH): os.makedirs(CHROMA_PATH) from langchain.vectorstores.chroma import Chroma from langchain.document_loaders import PyPDFLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter # Load and process the PDF def save_pdf_and_update_database(pdf_filepath): try: # Load the PDF document_loader = PyPDFLoader(pdf_filepath) documents = document_loader.load() # Split the documents into manageable chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=80, length_function=len, is_separator_regex=False, ) chunks = text_splitter.split_documents(documents) # Initialize Chroma with an embedding function embedding_function = HuggingFaceEmbeddings() db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function) # Add chunks to ChromaDB db.add_documents(chunks) db.persist() print("PDF processed and data updated in Chroma.") except Exception as e: print(f"Error processing PDF: {e}") AI71_API_KEY = os.environ.get('AI71_API_KEY') def generate_response(query, chat_history): response = '' for chunk in AI71(AI71_API_KEY).chat.completions.create( model="tiiuae/falcon-180b-chat", messages=[ {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."}, {"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''}, ], stream=True, ): if chunk.choices[0].delta.content: response += chunk.choices[0].delta.content return response.replace("###", '').replace('\nUser:', '') def query_rag(query_text: str, chat_history): db = Chroma(persist_directory=CHROMA_PATH, embedding_function=HuggingFaceEmbeddings()) # Perform a similarity search in ChromaDB results = db.similarity_search_with_score(query_text, k=5) if not results: return "Sorry, I couldn't find any relevant information." context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) # Generate the response using the Falcon model prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}" response = generate_response(prompt, chat_history) return response @app.route('/whatsapp', methods=['POST']) def whatsapp_webhook(): incoming_msg = request.values.get('Body', '').lower() sender = request.values.get('From') num_media = int(request.values.get('NumMedia', 0)) chat_history = conversation_memory.get_memory() if num_media > 0: media_url = request.values.get('MediaUrl0') content_type = request.values.get('MediaContentType0') if content_type == 'application/pdf': # Handle PDF processing filepath = download_file(media_url, ".pdf") save_pdf_and_update_database(filepath) response_text = "PDF has been processed. You can now ask questions related to its content." else: response_text = "Unsupported file type. Please upload a PDF document." else: # Handle queries response_text = query_rag(incoming_msg, chat_history) conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text}) send_message(sender, response_text) return '', 204 if __name__ == "__main__": send_initial_message('919080522395') send_initial_message('916382792828') app.run(host='0.0.0.0', port=7860)