# Import required libraries import PyPDF2 from getpass import getpass from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser from haystack.document_stores import InMemoryDocumentStore from haystack import Document, Pipeline from haystack.nodes import BM25Retriever from pprint import pprint import chainlit as cl import logging from dotenv import load_dotenv load_dotenv() import os import logging logging.basicConfig(level=logging.DEBUG) # Function to extract text from a PDF def extract_text_from_pdf(pdf_path): text = "" with open(pdf_path, "rb") as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() or "" return text # Extract text from the PDF file pdf_file_path = "Data/MR. MPROFY.pdf" pdf_text = extract_text_from_pdf(pdf_file_path) if not pdf_text: raise ValueError("No text extracted from PDF.") # Create a Haystack document doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"}) # Initialize Document Store document_store = InMemoryDocumentStore(use_bm25=True) document_store.write_documents([doc]) # Initialize Retriever retriever = BM25Retriever(document_store=document_store, top_k=2) # Define QA Template qa_template = PromptTemplate( prompt=""" Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. I won’t ask any follow-up questions myself. If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer. Context: {join(documents)}; Question: {query} Answer: """, output_parser=AnswerParser() ) # Get Huggingface token HF_TOKEN = os.getenv['HF_TOKEN'] # Initialize Prompt Node prompt_node = PromptNode( model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1", api_key=HF_TOKEN, default_prompt_template=qa_template, max_length=500, model_kwargs={"model_max_length": 5000} ) # Build Pipeline rag_pipeline = Pipeline() rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"]) rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"]) # Chainlit Function for Handling Messages @cl.on_message async def handle_message(message: cl.Message): query_text = message.content # Extract the text content from the message response = await cl.make_async(rag_pipeline.run)(query=query_text) answer = response["answers"][0].answer if response["answers"] else "No answer found." await cl.Message(author="Mprofier", content=answer).send() # Start the Chainlit application if __name__ == "__main__": cl.run()