Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,53 +5,48 @@ from langchain.embeddings import HuggingFaceEmbeddings
|
|
5 |
from langchain.vectorstores import FAISS
|
6 |
from langchain.llms import HuggingFaceHub
|
7 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
|
|
|
8 |
|
9 |
# Load the HuggingFace language model and embeddings
|
10 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
11 |
-
|
12 |
-
# Initialize the embeddings model for document retrieval
|
13 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
14 |
|
15 |
-
# Initialize vector_store and retriever as None initially
|
16 |
vector_store = None
|
17 |
retriever = None
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
def update_documents(text_input):
|
20 |
global vector_store, retriever
|
21 |
-
# Split the input text into individual documents based on newlines or other delimiters
|
22 |
documents = text_input.split("\n")
|
23 |
-
|
24 |
-
# Update the FAISS vector store with new documents
|
25 |
vector_store = FAISS.from_texts(documents, embeddings)
|
26 |
-
|
27 |
-
# Set the retriever to use the new vector store
|
28 |
retriever = vector_store.as_retriever()
|
29 |
return f"{len(documents)} documents successfully added to the vector store."
|
30 |
|
31 |
-
# Set up ConversationalRetrievalChain
|
32 |
rag_chain = None
|
33 |
|
34 |
-
def respond(
|
35 |
-
message,
|
36 |
-
history: list[tuple[str, str]],
|
37 |
-
system_message,
|
38 |
-
max_tokens,
|
39 |
-
temperature,
|
40 |
-
top_p,
|
41 |
-
):
|
42 |
global rag_chain, retriever
|
43 |
|
44 |
if retriever is None:
|
45 |
return "Please upload or enter documents before asking a question."
|
46 |
|
47 |
-
# Create the chain if it hasn't been initialized
|
48 |
if rag_chain is None:
|
49 |
rag_chain = ConversationalRetrievalChain.from_llm(
|
50 |
HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta"),
|
51 |
retriever=retriever
|
52 |
)
|
53 |
|
54 |
-
# Combine history with the user message
|
55 |
conversation_history = [{"role": "system", "content": system_message}]
|
56 |
|
57 |
for val in history:
|
@@ -62,35 +57,29 @@ def respond(
|
|
62 |
|
63 |
conversation_history.append({"role": "user", "content": message})
|
64 |
|
65 |
-
# Retrieve documents and generate response
|
66 |
response = rag_chain({"question": message, "chat_history": history})
|
67 |
-
|
68 |
-
# Return the model's response
|
69 |
return response['answer']
|
70 |
|
71 |
def upload_file(filepath):
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
77 |
|
78 |
# Gradio interface setup
|
79 |
demo = gr.Blocks()
|
80 |
|
81 |
with demo:
|
82 |
with gr.Row():
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
u.upload(upload_file, u, [u, d])
|
89 |
-
d.click(download_file, None, [u, d])
|
90 |
-
|
91 |
|
92 |
with gr.Row():
|
93 |
-
# Chat interface for the RAG system
|
94 |
chat = gr.ChatInterface(
|
95 |
respond,
|
96 |
additional_inputs=[
|
@@ -101,8 +90,5 @@ with demo:
|
|
101 |
],
|
102 |
)
|
103 |
|
104 |
-
# Bind button to update the document vector store
|
105 |
-
# upload_button.click(update_documents, inputs=[doc_input], outputs=gr.Textbox(label="Status"))
|
106 |
-
|
107 |
if __name__ == "__main__":
|
108 |
-
demo.launch()
|
|
|
5 |
from langchain.vectorstores import FAISS
|
6 |
from langchain.llms import HuggingFaceHub
|
7 |
from langchain.chains import ConversationalRetrievalChain
|
8 |
+
from unstructured.documents import from_pdf
|
9 |
+
import camelot
|
10 |
+
from pathlib import Path
|
11 |
|
12 |
# Load the HuggingFace language model and embeddings
|
13 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
|
|
|
|
14 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
15 |
|
|
|
16 |
vector_store = None
|
17 |
retriever = None
|
18 |
|
19 |
+
def extract_text_from_pdf(filepath):
|
20 |
+
# Use unstructured to read text from the PDF
|
21 |
+
documents = from_pdf(filepath)
|
22 |
+
return "\n".join([doc.text for doc in documents])
|
23 |
+
|
24 |
+
def extract_tables_from_pdf(filepath):
|
25 |
+
# Use camelot to read tables from the PDF
|
26 |
+
tables = camelot.read_pdf(filepath, pages='1-end')
|
27 |
+
return [table.df.to_string(index=False) for table in tables]
|
28 |
+
|
29 |
def update_documents(text_input):
|
30 |
global vector_store, retriever
|
|
|
31 |
documents = text_input.split("\n")
|
|
|
|
|
32 |
vector_store = FAISS.from_texts(documents, embeddings)
|
|
|
|
|
33 |
retriever = vector_store.as_retriever()
|
34 |
return f"{len(documents)} documents successfully added to the vector store."
|
35 |
|
|
|
36 |
rag_chain = None
|
37 |
|
38 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
global rag_chain, retriever
|
40 |
|
41 |
if retriever is None:
|
42 |
return "Please upload or enter documents before asking a question."
|
43 |
|
|
|
44 |
if rag_chain is None:
|
45 |
rag_chain = ConversationalRetrievalChain.from_llm(
|
46 |
HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta"),
|
47 |
retriever=retriever
|
48 |
)
|
49 |
|
|
|
50 |
conversation_history = [{"role": "system", "content": system_message}]
|
51 |
|
52 |
for val in history:
|
|
|
57 |
|
58 |
conversation_history.append({"role": "user", "content": message})
|
59 |
|
|
|
60 |
response = rag_chain({"question": message, "chat_history": history})
|
|
|
|
|
61 |
return response['answer']
|
62 |
|
63 |
def upload_file(filepath):
|
64 |
+
text = extract_text_from_pdf(filepath)
|
65 |
+
tables = extract_tables_from_pdf(filepath)
|
66 |
+
|
67 |
+
# Update documents in the vector store
|
68 |
+
update_documents(text)
|
69 |
+
|
70 |
+
return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {Path(filepath).name}", value=filepath, visible=True), f"{len(tables)} tables extracted."]
|
71 |
|
72 |
# Gradio interface setup
|
73 |
demo = gr.Blocks()
|
74 |
|
75 |
with demo:
|
76 |
with gr.Row():
|
77 |
+
u = gr.UploadButton("Upload a file", file_count="single")
|
78 |
+
d = gr.DownloadButton("Download the file", visible=False)
|
79 |
+
|
80 |
+
u.upload(upload_file, u, [u, d, "status"])
|
|
|
|
|
|
|
|
|
81 |
|
82 |
with gr.Row():
|
|
|
83 |
chat = gr.ChatInterface(
|
84 |
respond,
|
85 |
additional_inputs=[
|
|
|
90 |
],
|
91 |
)
|
92 |
|
|
|
|
|
|
|
93 |
if __name__ == "__main__":
|
94 |
+
demo.launch()
|