File size: 3,823 Bytes
052e52f 0fd9053 1d239e0 0fd9053 1d239e0 573cef7 1d239e0 c36a14b 0fd9053 d9a1f2d 0fd9053 1d239e0 d9a1f2d 0fd9053 d9a1f2d 0fd9053 1d239e0 c36a14b 1d239e0 04f308f 1d239e0 c36a14b 1d239e0 0fd9053 c36a14b 558f5d1 052e52f 558f5d1 a8f0234 1d239e0 a8f0234 1d239e0 558f5d1 1d239e0 558f5d1 c36a14b a8f0234 05b09c6 691414c 1a1cf31 c36a14b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
CHROMA_PATH = '/code/chroma_db'
if not os.path.exists(CHROMA_PATH):
os.makedirs(CHROMA_PATH)
from langchain.vectorstores.chroma import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Load and process the PDF
def save_pdf_and_update_database(pdf_filepath):
try:
# Load the PDF
document_loader = PyPDFLoader(pdf_filepath)
documents = document_loader.load()
# Split the documents into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=80,
length_function=len,
is_separator_regex=False,
)
chunks = text_splitter.split_documents(documents)
# Initialize Chroma with an embedding function
embedding_function = HuggingFaceEmbeddings()
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
# Add chunks to ChromaDB
db.add_documents(chunks)
db.persist()
print("PDF processed and data updated in Chroma.")
except Exception as e:
print(f"Error processing PDF: {e}")
AI71_API_KEY = os.environ.get('AI71_API_KEY')
def generate_response(query, chat_history):
response = ''
for chunk in AI71(AI71_API_KEY).chat.completions.create(
model="tiiuae/falcon-180b-chat",
messages=[
{"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
{"role": "user", "content": f'''Answer the query based on history {chat_history}: {query}'''},
],
stream=True,
):
if chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
return response.replace("###", '').replace('\nUser:', '')
def query_rag(query_text: str, chat_history):
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=HuggingFaceEmbeddings())
# Perform a similarity search in ChromaDB
results = db.similarity_search_with_score(query_text, k=5)
if not results:
return "Sorry, I couldn't find any relevant information."
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
# Generate the response using the Falcon model
prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
response = generate_response(prompt, chat_history)
return response
@app.route('/whatsapp', methods=['POST'])
def whatsapp_webhook():
incoming_msg = request.values.get('Body', '').lower()
sender = request.values.get('From')
num_media = int(request.values.get('NumMedia', 0))
chat_history = conversation_memory.get_memory()
if num_media > 0:
media_url = request.values.get('MediaUrl0')
content_type = request.values.get('MediaContentType0')
if content_type == 'application/pdf':
# Handle PDF processing
filepath = download_file(media_url, ".pdf")
save_pdf_and_update_database(filepath)
response_text = "PDF has been processed. You can now ask questions related to its content."
else:
response_text = "Unsupported file type. Please upload a PDF document."
else:
# Handle queries
response_text = query_rag(incoming_msg, chat_history)
conversation_memory.add_to_memory({"user": incoming_msg, "assistant": response_text})
send_message(sender, response_text)
return '', 204
if __name__ == "__main__":
send_initial_message('919080522395')
send_initial_message('916382792828')
app.run(host='0.0.0.0', port=7860) |