vishwask commited on
Commit
279b0d5
·
verified ·
1 Parent(s): 3886089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -79,7 +79,7 @@ def load_db():
79
 
80
 
81
  # Initialize langchain LLM chain
82
- def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
83
  llm = HuggingFaceHub(repo_id=llm_model, model_kwargs={"temperature":
84
  temperature, "max_new_tokens":
85
  max_tokens, "top_k": top_k,
@@ -111,7 +111,6 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap):
111
  # Create list of documents (when valid)
112
  list_file_path = [x.name for x in list_file_obj if x is not None]
113
  # Create collection_name for vector database
114
- progress(0.1, desc="Creating collection name...")
115
  collection_name = Path(list_file_path[0]).stem
116
  # Fix potential issues from naming convention
117
  ## Remove space
@@ -125,23 +124,20 @@ def initialize_database(list_file_obj, chunk_size, chunk_overlap):
125
  collection_name[-1] = 'Z'
126
  # print('list_file_path: ', list_file_path)
127
  print('Collection name: ', collection_name)
128
- progress(0.25, desc="Loading document...")
129
  # Load document and create splits
130
  doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
131
  # Create or load vector database
132
- progress(0.5, desc="Generating vector database...")
133
  # global vector_db
134
  vector_db = create_db(doc_splits, collection_name)
135
- progress(0.9, desc="Done!")
136
  return vector_db, collection_name
137
 
138
 
139
- def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
140
  # print("llm_option",llm_option)
141
  llm_name = list_llm[llm_option]
142
  print("llm_name: ",llm_name)
143
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
144
- return qa_chain, "Complete!"
145
 
146
 
147
  def format_chat_history(message, chat_history):
@@ -175,7 +171,7 @@ def conversation(qa_chain, message, history):
175
  # Append user message and response to chat history
176
  new_history = history + [(message, response_answer)]
177
  # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
178
- return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
179
 
180
 
181
  def upload_file(file_obj):
@@ -194,11 +190,10 @@ def demo():
194
  qa_chain = gr.State()
195
  collection_name = gr.State()
196
 
197
- document = gr.Files(value = ['/home/user/app/pdfs/Annual-Report-2022-2023-English_1.pdf'],visible=False,
198
- height=100, file_count="multiple", file_types=["pdf"], label="Upload your PDF documents (single or multiple)")
199
  chatbot = gr.Chatbot(height=300)
200
  db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database", visible=False)
201
- with gr.Accordion("Advanced - Document references", open=False):
202
  with gr.Row():
203
  doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
204
  source1_page = gr.Number(label="Page", scale=1)
 
79
 
80
 
81
  # Initialize langchain LLM chain
82
+ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db):
83
  llm = HuggingFaceHub(repo_id=llm_model, model_kwargs={"temperature":
84
  temperature, "max_new_tokens":
85
  max_tokens, "top_k": top_k,
 
111
  # Create list of documents (when valid)
112
  list_file_path = [x.name for x in list_file_obj if x is not None]
113
  # Create collection_name for vector database
 
114
  collection_name = Path(list_file_path[0]).stem
115
  # Fix potential issues from naming convention
116
  ## Remove space
 
124
  collection_name[-1] = 'Z'
125
  # print('list_file_path: ', list_file_path)
126
  print('Collection name: ', collection_name)
 
127
  # Load document and create splits
128
  doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
129
  # Create or load vector database
 
130
  # global vector_db
131
  vector_db = create_db(doc_splits, collection_name)
 
132
  return vector_db, collection_name
133
 
134
 
135
+ def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db):
136
  # print("llm_option",llm_option)
137
  llm_name = list_llm[llm_option]
138
  print("llm_name: ",llm_name)
139
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
140
+ return qa_chain
141
 
142
 
143
  def format_chat_history(message, chat_history):
 
171
  # Append user message and response to chat history
172
  new_history = history + [(message, response_answer)]
173
  # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
174
+ return qa_chain, new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
175
 
176
 
177
  def upload_file(file_obj):
 
190
  qa_chain = gr.State()
191
  collection_name = gr.State()
192
 
193
+ document = gr.Files(value = os.listdir('/home/user/app/pdfs/'),visible=False,height=100, file_count="multiple", file_types=["pdf"])
 
194
  chatbot = gr.Chatbot(height=300)
195
  db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database", visible=False)
196
+ with gr.Accordion("Document references", open=False):
197
  with gr.Row():
198
  doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
199
  source1_page = gr.Number(label="Page", scale=1)