Maxx0 commited on
Commit
07778e8
·
1 Parent(s): 32a13b5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import required libraries
2
+ import PyPDF2
3
+ from getpass import getpass
4
+ from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser
5
+ from haystack.document_stores import InMemoryDocumentStore
6
+ from haystack import Document, Pipeline
7
+ from haystack.nodes import BM25Retriever
8
+ from pprint import pprint
9
+ import streamlit as st
10
+ import logging
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+ import os
14
+ import logging
15
+ from key import HF_KEY
16
+ logging.basicConfig(level=logging.DEBUG)
17
+
18
+ # Function to extract text from a PDF
19
+ def extract_text_from_pdf(pdf_path):
20
+ text = ""
21
+ with open(pdf_path, "rb") as pdf_file:
22
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
23
+ for page_num in range(len(pdf_reader.pages)):
24
+ page = pdf_reader.pages[page_num]
25
+ text += page.extract_text() or ""
26
+ return text
27
+
28
+ # Extract text from the PDF file
29
+ pdf_file_path = "Data/MR. MPROFY.pdf"
30
+ pdf_text = extract_text_from_pdf(pdf_file_path)
31
+ if not pdf_text:
32
+ raise ValueError("No text extracted from PDF.")
33
+
34
+ # Create a Haystack document
35
+ doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"})
36
+
37
+ # Initialize Document Store
38
+ document_store = InMemoryDocumentStore(use_bm25=True)
39
+ document_store.write_documents([doc])
40
+
41
+ # Initialize Retriever
42
+ retriever = BM25Retriever(document_store=document_store, top_k=2)
43
+
44
+ # Define QA Template
45
+ qa_template = PromptTemplate(
46
+ prompt="""
47
+ Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions.
48
+ I won’t ask any follow-up questions myself.
49
+ If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer.
50
+ Context: {join(documents)};
51
+ Question: {query}
52
+ Answer:
53
+ """,
54
+ output_parser=AnswerParser()
55
+ )
56
+
57
+ # Get Huggingface token
58
+ HF_TOKEN = os.getenv('HF_KEY')
59
+
60
+ # Initialize Prompt Node
61
+ prompt_node = PromptNode(
62
+ model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1",
63
+ api_key=HF_TOKEN,
64
+ default_prompt_template=qa_template,
65
+ max_length=500,
66
+ model_kwargs={"model_max_length": 5000}
67
+ )
68
+
69
+ # Build Pipeline
70
+ rag_pipeline = Pipeline()
71
+ rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
72
+ rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"])
73
+
74
+ # Streamlit Function for Handling Input and Displaying Output
75
+ def run_streamlit_app():
76
+ st.title("Mprofier - AI Assistant")
77
+ query_text = st.text_input("Enter your question:")
78
+
79
+ if st.button("Get Answer"):
80
+ response = rag_pipeline.run(query=query_text)
81
+ answer = response["answers"][0].answer if response["answers"] else "No answer found."
82
+ st.write(answer)
83
+
84
+ # Start the Streamlit application
85
+ if __name__ == "__main__":
86
+ run_streamlit_app()