Spaces:

Neurolingua
/

AgriChatbot

Sleeping

App Files Files Community

Neurolingua commited on Aug 22, 2024

Commit

f4738b1

verified ·

1 Parent(s): d3d3acb

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -8

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 import requests
 from twilio.rest import Client
 # Flask app
 app = Flask(__name__)
@@ -15,6 +16,35 @@ CHROMA_PATH = '/code/chroma_db'
 if not os.path.exists(CHROMA_PATH):
     os.makedirs(CHROMA_PATH)
 # Initialize ChromaDB
 def initialize_chroma():
     try:
@@ -46,15 +76,67 @@ def download_file(url, ext):
     return local_filename
 # Process PDF and return text
 def extract_text_from_pdf(pdf_filepath):
     try:
-        document_loader = PyPDFLoader(pdf_filepath)
-        documents = document_loader.load()
-        text = "\n\n".join([doc.page_content for doc in documents])
-        return text
     except Exception as e:
         print(f"Error processing PDF: {e}")
-        return "Error extracting text from PDF."
 # Flask route to handle WhatsApp webhook
 @app.route('/whatsapp', methods=['POST'])
@@ -63,19 +145,23 @@ def whatsapp_webhook():
     sender = request.values.get('From')
     num_media = int(request.values.get('NumMedia', 0))
     if num_media > 0:
         media_url = request.values.get('MediaUrl0')
         content_type = request.values.get('MediaContentType0')
         if content_type == 'application/pdf':
             filepath = download_file(media_url, ".pdf")
-            extracted_text = extract_text_from_pdf(filepath)
-            response_text = f"Here is the content of the PDF:\n\n{extracted_text}"
         else:
             response_text = "Unsupported file type. Please upload a PDF document."
     else:
-        response_text = "Please upload a PDF document."
     send_message(sender, response_text)
     return '', 204

 import requests
 from twilio.rest import Client
 # Flask app
 app = Flask(__name__)
 if not os.path.exists(CHROMA_PATH):
     os.makedirs(CHROMA_PATH)
+from ai71 import AI71
+def generate_response(query, chat_history):
+    response = ''
+    try:
+        ai71_client = AI71(api_key=AI71_API_KEY)
+        chat_completion = ai71_client.chat.completions.create(
+            model="tiiuae/falcon-180b-chat",
+            messages=[
+                {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
+                {"role": "user", "content": f"Answer the query based on history {chat_history}: {query}"}
+            ],
+            stream=True
+        )
+        for chunk in chat_completion:
+            if chunk.choices[0].delta.content:
+                response += chunk.choices[0].delta.content
+        # Clean up response text
+        response = response.replace("###", '').replace('\nUser:', '')
+    except Exception as e:
+        print(f"Error generating response: {e}")
+        response = "An error occurred while generating the response."
+    return response
 # Initialize ChromaDB
 def initialize_chroma():
     try:
     return local_filename
 # Process PDF and return text
+import fitz  # PyMuPDF
 def extract_text_from_pdf(pdf_filepath):
+    text = ''
+    try:
+        pdf_document = fitz.open(pdf_filepath)
+        for page_num in range(len(pdf_document)):
+            page = pdf_document.load_page(page_num)
+            text += page.get_text()
+        pdf_document.close()
+    except Exception as e:
+        print(f"Error extracting text from PDF: {e}")
+        return None
+    return text
+def query_rag(query_text: str, chat_history):
+    try:
+        embedding_function = HuggingFaceEmbeddings()
+        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
+        results = db.similarity_search_with_score(query_text, k=5)
+        if not results:
+            return "Sorry, I couldn't find any relevant information."
+        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
+        prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
+        response = generate_response(prompt, chat_history)
+        return response
+    except Exception as e:
+        print(f"Error querying RAG system: {e}")
+        return "An error occurred while querying the RAG system."
+def save_pdf_and_update_database(pdf_filepath):
     try:
+        text = extract_text_from_pdf(pdf_filepath)
+        if not text:
+            print("Error extracting text from PDF.")
+            return
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=800,
+            chunk_overlap=80,
+            length_function=len,
+            is_separator_regex=False,
+        )
+        chunks = text_splitter.split_text(text)
+        embedding_function = HuggingFaceEmbeddings()
+        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
+        db.add_documents(chunks)
+        db.persist()
+        print("PDF processed and data updated in Chroma.")
     except Exception as e:
         print(f"Error processing PDF: {e}")
 # Flask route to handle WhatsApp webhook
 @app.route('/whatsapp', methods=['POST'])
     sender = request.values.get('From')
     num_media = int(request.values.get('NumMedia', 0))
+    chat_history = []  # You need to handle chat history appropriately
     if num_media > 0:
         media_url = request.values.get('MediaUrl0')
         content_type = request.values.get('MediaContentType0')
         if content_type == 'application/pdf':
             filepath = download_file(media_url, ".pdf")
+            save_pdf_and_update_database(filepath)
+            response_text = "PDF has been processed. You can now ask questions related to its content."
         else:
             response_text = "Unsupported file type. Please upload a PDF document."
     else:
+        # Use RAG to generate a response based on the query
+        response_text = query_rag(incoming_msg, chat_history)
+    # Send the response back to the sender
     send_message(sender, response_text)
     return '', 204