Spaces:

Maxx0
/

testprofytest

Sleeping

App Files Files Community

testprofytest / app.py

Maxx0

Create app.py

b47616b over 1 year ago

raw

history blame

2.78 kB

	# Import required libraries
	import PyPDF2
	from getpass import getpass
	from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
	from haystack.document_stores import InMemoryDocumentStore
	from haystack import Document, Pipeline
	from haystack.nodes import BM25Retriever
	from pprint import pprint
	import chainlit as cl
	import logging
	from dotenv import load_dotenv
	load_dotenv()
	import os
	import logging
	logging.basicConfig(level=logging.DEBUG)

	# Function to extract text from a PDF
	def extract_text_from_pdf(pdf_path):
	text = ""
	with open(pdf_path, "rb") as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text() or ""
	return text

	# Extract text from the PDF file
	pdf_file_path = "Data/MR. MPROFY.pdf"
	pdf_text = extract_text_from_pdf(pdf_file_path)
	if not pdf_text:
	raise ValueError("No text extracted from PDF.")

	# Create a Haystack document
	doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})

	# Initialize Document Store
	document_store = InMemoryDocumentStore(use_bm25=True)
	document_store.write_documents([doc])

	# Initialize Retriever
	retriever = BM25Retriever(document_store=document_store, top_k=2)

	# Define QA Template
	qa_template = PromptTemplate(
	prompt="""
	Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
	I won’t ask any follow-up questions myself.
	If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
	Context: {join(documents)};
	Question: {query}
	Answer:
	""",
	output_parser=AnswerParser()
	)

	# Get Huggingface token
	HF_TOKEN = os.getenv['HF_TOKEN']

	# Initialize Prompt Node
	prompt_node = PromptNode(
	model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
	api_key=HF_TOKEN,
	default_prompt_template=qa_template,
	max_length=500,
	model_kwargs={"model_max_length": 5000}
	)

	# Build Pipeline
	rag_pipeline = Pipeline()
	rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
	rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])

	# Chainlit Function for Handling Messages
	@cl.on_message
	async def handle_message(message: cl.Message):
	query_text = message.content # Extract the text content from the message
	response = await cl.make_async(rag_pipeline.run)(query=query_text)
	answer = response["answers"][0].answer if response["answers"] else "No answer found."
	await cl.Message(author="Mprofier", content=answer).send()

	# Start the Chainlit application
	if __name__ == "__main__":
	cl.run()