testprofytest / app.py
Maxx0's picture
Create app.py
b47616b
raw
history blame
2.78 kB
# Import required libraries
import PyPDF2
from getpass import getpass
from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
from haystack.document_stores import InMemoryDocumentStore
from haystack import Document, Pipeline
from haystack.nodes import BM25Retriever
from pprint import pprint
import chainlit as cl
import logging
from dotenv import load_dotenv
load_dotenv()
import os
import logging
logging.basicConfig(level=logging.DEBUG)
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text() or ""
return text
# Extract text from the PDF file
pdf_file_path = "Data/MR. MPROFY.pdf"
pdf_text = extract_text_from_pdf(pdf_file_path)
if not pdf_text:
raise ValueError("No text extracted from PDF.")
# Create a Haystack document
doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
# Initialize Document Store
document_store = InMemoryDocumentStore(use_bm25=True)
document_store.write_documents([doc])
# Initialize Retriever
retriever = BM25Retriever(document_store=document_store, top_k=2)
# Define QA Template
qa_template = PromptTemplate(
prompt="""
Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
I won’t ask any follow-up questions myself.
If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
Context: {join(documents)};
Question: {query}
Answer:
""",
output_parser=AnswerParser()
)
# Get Huggingface token
HF_TOKEN = os.getenv['HF_TOKEN']
# Initialize Prompt Node
prompt_node = PromptNode(
model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
api_key=HF_TOKEN,
default_prompt_template=qa_template,
max_length=500,
model_kwargs={"model_max_length": 5000}
)
# Build Pipeline
rag_pipeline = Pipeline()
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
# Chainlit Function for Handling Messages
@cl.on_message
async def handle_message(message: cl.Message):
query_text = message.content # Extract the text content from the message
response = await cl.make_async(rag_pipeline.run)(query=query_text)
answer = response["answers"][0].answer if response["answers"] else "No answer found."
await cl.Message(author="Mprofier", content=answer).send()
# Start the Chainlit application
if __name__ == "__main__":
cl.run()