Spaces:

Mattral
/

SampleMiniRagDoc

Running

App Files Files Community

Mattral commited on Feb 13

Commit

6e04d14

verified ·

1 Parent(s): a431be9

Create app.py

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import streamlit as st
+import os
+from huggingface_hub import InferenceClient
+from langchain_community.document_loaders import PDFPlumberLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain_core.prompts import ChatPromptTemplate
+# Set up Hugging Face model and token
+model = "mistralai/Mixtral-8x7B-Instruct-v0.1"  # You can change to a model of your choice from Hugging Face
+access_token = os.getenv("HF_TOKEN")  # Your Hugging Face API token
+client = InferenceClient(model=model, token=access_token)
+# Template for response generation
+template = """
+You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
+Question: {question}
+Context: {context}
+Answer:
+"""
+# Directory to store uploaded PDFs
+pdfs_directory = '../pdfs'
+os.makedirs(pdfs_directory, exist_ok=True)
+# Initialize the vector store for document indexing
+vector_store = InMemoryVectorStore()
+# Function to upload PDF file
+def upload_pdf(file):
+    with open(pdfs_directory + file.name, "wb") as f:
+        f.write(file.getbuffer())
+# Function to load PDF content
+def load_pdf(file_path):
+    loader = PDFPlumberLoader(file_path)
+    documents = loader.load()
+    return documents
+# Function to split text into manageable chunks
+def split_text(documents):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200,
+        add_start_index=True
+    )
+    return text_splitter.split_documents(documents)
+# Function to index documents in the vector store
+def index_docs(documents):
+    vector_store.add_documents(documents)
+# Function to retrieve relevant documents based on query
+def retrieve_docs(query):
+    return vector_store.similarity_search(query)
+# Function to generate an answer based on retrieved documents
+def answer_question(question, documents):
+    context = "\n\n".join([doc.page_content for doc in documents])
+    full_context = f"{context}"
+    prompt = ChatPromptTemplate.from_template(template)
+    chain = prompt | client  # Send the prompt to Hugging Face's model via InferenceClient
+    return chain.invoke({"question": question, "context": full_context})
+# Streamlit file uploader for PDF
+uploaded_file = st.file_uploader(
+    "Upload PDF",
+    type="pdf",
+    accept_multiple_files=False
+)
+if uploaded_file:
+    # Upload, load, split, and index documents
+    upload_pdf(uploaded_file)
+    documents = load_pdf(pdfs_directory + uploaded_file.name)
+    chunked_documents = split_text(documents)
+    index_docs(chunked_documents)
+    # User input for a question
+    question = st.chat_input()
+    if question:
+        st.chat_message("user").write(question)
+        related_documents = retrieve_docs(question)
+        answer = answer_question(question, related_documents)
+        st.chat_message("assistant").write(answer)