File size: 6,326 Bytes
940c98a
052e52f
940c98a
 
 
 
 
96fe0c0
 
f4738b1
940c98a
 
 
 
c8af05e
1d239e0
 
fa7d405
f4738b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa7d405
f85bc8f
 
fa7d405
f85bc8f
 
 
 
 
 
 
 
 
 
940c98a
 
6ac9478
 
 
 
940c98a
 
 
c8af05e
940c98a
 
 
 
 
 
fa7d405
f4738b1
 
 
 
fa7d405
f4738b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d239e0
f4738b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d239e0
 
0fd9053
940c98a
558f5d1
 
052e52f
558f5d1
 
 
f4738b1
 
558f5d1
 
 
a8f0234
1d239e0
 
f4738b1
 
1d239e0
 
558f5d1
f4738b1
 
558f5d1
f4738b1
c36a14b
 
fa7d405
 
940c98a
 
 
 
 
 
 
 
 
 
d3d3acb
 
 
 
 
fa7d405
05b09c6
691414c
1a1cf31
c36a14b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from flask import Flask, request
import os
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import requests
from twilio.rest import Client


# Flask app
app = Flask(__name__)

# ChromaDB path
CHROMA_PATH = '/code/chroma_db'
if not os.path.exists(CHROMA_PATH):
    os.makedirs(CHROMA_PATH)

from ai71 import AI71

def generate_response(query, chat_history):
    response = ''
    try:
        ai71_client = AI71(api_key=AI71_API_KEY)
        chat_completion = ai71_client.chat.completions.create(
            model="tiiuae/falcon-180b-chat",
            messages=[
                {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
                {"role": "user", "content": f"Answer the query based on history {chat_history}: {query}"}
            ],
            stream=True
        )
        
        for chunk in chat_completion:
            if chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
        
        # Clean up response text
        response = response.replace("###", '').replace('\nUser:', '')
        
    except Exception as e:
        print(f"Error generating response: {e}")
        response = "An error occurred while generating the response."
    
    return response


# Initialize ChromaDB
def initialize_chroma():
    try:
        embedding_function = HuggingFaceEmbeddings()
        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
        
        # Perform an initial operation to ensure the database is correctly initialized
        db.similarity_search_with_score("test query", k=1)
        print("Chroma initialized successfully.")
    except Exception as e:
        print(f"Error initializing Chroma: {e}")

initialize_chroma()

# Set AI71 API key
AI71_API_KEY = os.environ.get('AI71_API_KEY')
account_sid = os.environ.get('TWILIO_ACCOUNT_SID')
auth_token = os.environ.get('TWILIO_AUTH_TOKEN')
client = Client(account_sid, auth_token)
from_whatsapp_number = 'whatsapp:+14155238886'

# Download file utility
def download_file(url, ext):
    local_filename = f'/code/uploaded_file{ext}'
    with requests.get(url, stream=True) as r:
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

# Process PDF and return text


import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_filepath):
    text = ''
    try:
        pdf_document = fitz.open(pdf_filepath)
        for page_num in range(len(pdf_document)):
            page = pdf_document.load_page(page_num)
            text += page.get_text()
        pdf_document.close()
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None
    return text

def query_rag(query_text: str, chat_history):
    try:
        embedding_function = HuggingFaceEmbeddings()
        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
        
        results = db.similarity_search_with_score(query_text, k=5)
        
        if not results:
            return "Sorry, I couldn't find any relevant information."
        
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
        
        prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
        response = generate_response(prompt, chat_history)
        
        return response
    except Exception as e:
        print(f"Error querying RAG system: {e}")
        return "An error occurred while querying the RAG system."


def save_pdf_and_update_database(pdf_filepath):
    try:
        text = extract_text_from_pdf(pdf_filepath)
        if not text:
            print("Error extracting text from PDF.")
            return

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=800,
            chunk_overlap=80,
            length_function=len,
            is_separator_regex=False,
        )
        chunks = text_splitter.split_text(text)

        embedding_function = HuggingFaceEmbeddings()
        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

        db.add_documents(chunks)
        db.persist()
        print("PDF processed and data updated in Chroma.")
    except Exception as e:
        print(f"Error processing PDF: {e}")

# Flask route to handle WhatsApp webhook
@app.route('/whatsapp', methods=['POST'])
def whatsapp_webhook():
    incoming_msg = request.values.get('Body', '').lower()
    sender = request.values.get('From')
    num_media = int(request.values.get('NumMedia', 0))

    chat_history = []  # You need to handle chat history appropriately

    if num_media > 0:
        media_url = request.values.get('MediaUrl0')
        content_type = request.values.get('MediaContentType0')

        if content_type == 'application/pdf':
            filepath = download_file(media_url, ".pdf")
            save_pdf_and_update_database(filepath)
            response_text = "PDF has been processed. You can now ask questions related to its content."
        else:
            response_text = "Unsupported file type. Please upload a PDF document."
    else:
        # Use RAG to generate a response based on the query
        response_text = query_rag(incoming_msg, chat_history)

    # Send the response back to the sender
    send_message(sender, response_text)
    return '', 204

# Function to send message
def send_message(to, body):
    try:
        message = client.messages.create(
            from_=from_whatsapp_number,
            body=body,
            to=to
        )
        print(f"Message sent with SID: {message.sid}")
    except Exception as e:
        print(f"Error sending message: {e}")
def send_initial_message(to_number):
    send_message(
        f'whatsapp:{to_number}',
        'Welcome to the Agri AI Chatbot! How can I assist you today? You can send an image with "pest" or "disease" to classify it.'
    )

if __name__ == "__main__":
    send_initial_message('919080522395')
    send_initial_message('916382792828')
    app.run(host='0.0.0.0', port=7860)