raj999 commited on
Commit
27a6371
·
verified ·
1 Parent(s): b3ae10a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -40
app.py CHANGED
@@ -5,53 +5,48 @@ from langchain.embeddings import HuggingFaceEmbeddings
5
  from langchain.vectorstores import FAISS
6
  from langchain.llms import HuggingFaceHub
7
  from langchain.chains import ConversationalRetrievalChain
 
 
 
8
 
9
  # Load the HuggingFace language model and embeddings
10
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
11
-
12
- # Initialize the embeddings model for document retrieval
13
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
14
 
15
- # Initialize vector_store and retriever as None initially
16
  vector_store = None
17
  retriever = None
18
 
 
 
 
 
 
 
 
 
 
 
19
  def update_documents(text_input):
20
  global vector_store, retriever
21
- # Split the input text into individual documents based on newlines or other delimiters
22
  documents = text_input.split("\n")
23
-
24
- # Update the FAISS vector store with new documents
25
  vector_store = FAISS.from_texts(documents, embeddings)
26
-
27
- # Set the retriever to use the new vector store
28
  retriever = vector_store.as_retriever()
29
  return f"{len(documents)} documents successfully added to the vector store."
30
 
31
- # Set up ConversationalRetrievalChain
32
  rag_chain = None
33
 
34
- def respond(
35
- message,
36
- history: list[tuple[str, str]],
37
- system_message,
38
- max_tokens,
39
- temperature,
40
- top_p,
41
- ):
42
  global rag_chain, retriever
43
 
44
  if retriever is None:
45
  return "Please upload or enter documents before asking a question."
46
 
47
- # Create the chain if it hasn't been initialized
48
  if rag_chain is None:
49
  rag_chain = ConversationalRetrievalChain.from_llm(
50
  HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta"),
51
  retriever=retriever
52
  )
53
 
54
- # Combine history with the user message
55
  conversation_history = [{"role": "system", "content": system_message}]
56
 
57
  for val in history:
@@ -62,35 +57,29 @@ def respond(
62
 
63
  conversation_history.append({"role": "user", "content": message})
64
 
65
- # Retrieve documents and generate response
66
  response = rag_chain({"question": message, "chat_history": history})
67
-
68
- # Return the model's response
69
  return response['answer']
70
 
71
  def upload_file(filepath):
72
- name = Path(filepath).name
73
- return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {name}", value=filepath, visible=True)]
74
-
75
- def download_file():
76
- return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
 
 
77
 
78
  # Gradio interface setup
79
  demo = gr.Blocks()
80
 
81
  with demo:
82
  with gr.Row():
83
- # upload_button = gr.Button("Upload Documents")
84
- with gr.Row():
85
- u = gr.UploadButton("Upload a file", file_count="single")
86
- d = gr.DownloadButton("Download the file", visible=False)
87
-
88
- u.upload(upload_file, u, [u, d])
89
- d.click(download_file, None, [u, d])
90
-
91
 
92
  with gr.Row():
93
- # Chat interface for the RAG system
94
  chat = gr.ChatInterface(
95
  respond,
96
  additional_inputs=[
@@ -101,8 +90,5 @@ with demo:
101
  ],
102
  )
103
 
104
- # Bind button to update the document vector store
105
- # upload_button.click(update_documents, inputs=[doc_input], outputs=gr.Textbox(label="Status"))
106
-
107
  if __name__ == "__main__":
108
- demo.launch()
 
5
  from langchain.vectorstores import FAISS
6
  from langchain.llms import HuggingFaceHub
7
  from langchain.chains import ConversationalRetrievalChain
8
+ from unstructured.documents import from_pdf
9
+ import camelot
10
+ from pathlib import Path
11
 
12
  # Load the HuggingFace language model and embeddings
13
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
14
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
15
 
 
16
  vector_store = None
17
  retriever = None
18
 
19
+ def extract_text_from_pdf(filepath):
20
+ # Use unstructured to read text from the PDF
21
+ documents = from_pdf(filepath)
22
+ return "\n".join([doc.text for doc in documents])
23
+
24
+ def extract_tables_from_pdf(filepath):
25
+ # Use camelot to read tables from the PDF
26
+ tables = camelot.read_pdf(filepath, pages='1-end')
27
+ return [table.df.to_string(index=False) for table in tables]
28
+
29
  def update_documents(text_input):
30
  global vector_store, retriever
 
31
  documents = text_input.split("\n")
 
 
32
  vector_store = FAISS.from_texts(documents, embeddings)
 
 
33
  retriever = vector_store.as_retriever()
34
  return f"{len(documents)} documents successfully added to the vector store."
35
 
 
36
  rag_chain = None
37
 
38
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
39
  global rag_chain, retriever
40
 
41
  if retriever is None:
42
  return "Please upload or enter documents before asking a question."
43
 
 
44
  if rag_chain is None:
45
  rag_chain = ConversationalRetrievalChain.from_llm(
46
  HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta"),
47
  retriever=retriever
48
  )
49
 
 
50
  conversation_history = [{"role": "system", "content": system_message}]
51
 
52
  for val in history:
 
57
 
58
  conversation_history.append({"role": "user", "content": message})
59
 
 
60
  response = rag_chain({"question": message, "chat_history": history})
 
 
61
  return response['answer']
62
 
63
  def upload_file(filepath):
64
+ text = extract_text_from_pdf(filepath)
65
+ tables = extract_tables_from_pdf(filepath)
66
+
67
+ # Update documents in the vector store
68
+ update_documents(text)
69
+
70
+ return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {Path(filepath).name}", value=filepath, visible=True), f"{len(tables)} tables extracted."]
71
 
72
  # Gradio interface setup
73
  demo = gr.Blocks()
74
 
75
  with demo:
76
  with gr.Row():
77
+ u = gr.UploadButton("Upload a file", file_count="single")
78
+ d = gr.DownloadButton("Download the file", visible=False)
79
+
80
+ u.upload(upload_file, u, [u, d, "status"])
 
 
 
 
81
 
82
  with gr.Row():
 
83
  chat = gr.ChatInterface(
84
  respond,
85
  additional_inputs=[
 
90
  ],
91
  )
92
 
 
 
 
93
  if __name__ == "__main__":
94
+ demo.launch()