File size: 3,823 Bytes
052e52f
0fd9053
1d239e0
 
 
0fd9053
1d239e0
 
 
573cef7
1d239e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c36a14b
0fd9053
d9a1f2d
 
0fd9053
 
 
 
1d239e0
d9a1f2d
0fd9053
 
 
 
 
d9a1f2d
0fd9053
1d239e0
 
c36a14b
1d239e0
04f308f
 
 
1d239e0
c36a14b
1d239e0
 
 
 
 
 
 
0fd9053
c36a14b
558f5d1
 
052e52f
558f5d1
 
 
 
 
 
 
 
a8f0234
1d239e0
a8f0234
1d239e0
 
 
 
 
558f5d1
1d239e0
 
558f5d1
c36a14b
 
 
 
a8f0234
05b09c6
691414c
1a1cf31
c36a14b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os

CHROMA_PATH = '/code/chroma_db'
if not os.path.exists(CHROMA_PATH):
    os.makedirs(CHROMA_PATH)
from langchain.vectorstores.chroma import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load and process the PDF
def save_pdf_and_update_database(pdf_filepath):
    try:
        # Load the PDF
        document_loader = PyPDFLoader(pdf_filepath)
        documents = document_loader.load()
        
        # Split the documents into manageable chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=800,
            chunk_overlap=80,
            length_function=len,
            is_separator_regex=False,
        )
        chunks = text_splitter.split_documents(documents)
        
        # Initialize Chroma with an embedding function
        embedding_function = HuggingFaceEmbeddings()
        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
        
        # Add chunks to ChromaDB
        db.add_documents(chunks)
        db.persist()
        print("PDF processed and data updated in Chroma.")
    except Exception as e:
        print(f"Error processing PDF: {e}")

AI71_API_KEY = os.environ.get('AI71_API_KEY')

def generate_response(query, chat_history):
    response = ''
    for chunk in AI71(AI71_API_KEY).chat.completions.create(
            model="tiiuae/falcon-180b-chat",
            messages=[
                {"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
                {"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''},
            ],
            stream=True,
    ):
        if chunk.choices[0].delta.content:
            response += chunk.choices[0].delta.content
    return response.replace("###", '').replace('\nUser:', '')

def query_rag(query_text: str, chat_history):
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=HuggingFaceEmbeddings())
    
    # Perform a similarity search in ChromaDB
    results = db.similarity_search_with_score(query_text, k=5)
    
    if not results:
        return "Sorry, I couldn't find any relevant information."
    
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    
    # Generate the response using the Falcon model
    prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
    response = generate_response(prompt, chat_history)
    
    return response


@app.route('/whatsapp', methods=['POST'])
def whatsapp_webhook():
    incoming_msg = request.values.get('Body', '').lower()
    sender = request.values.get('From')
    num_media = int(request.values.get('NumMedia', 0))

    chat_history = conversation_memory.get_memory()

    if num_media > 0:
        media_url = request.values.get('MediaUrl0')
        content_type = request.values.get('MediaContentType0')

        if content_type == 'application/pdf':
            # Handle PDF processing
            filepath = download_file(media_url, ".pdf")
            save_pdf_and_update_database(filepath)
            response_text = "PDF has been processed. You can now ask questions related to its content."
        else:
            response_text = "Unsupported file type. Please upload a PDF document."
    else:
        # Handle queries
        response_text = query_rag(incoming_msg, chat_history)

    conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
    send_message(sender, response_text)
    return '', 204


if __name__ == "__main__":
    send_initial_message('919080522395')
    send_initial_message('916382792828')
    app.run(host='0.0.0.0', port=7860)