akshay326 commited on
Commit
2c96c1a
·
unverified ·
1 Parent(s): e5044dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -12,8 +12,10 @@ from langchain.memory import ConversationBufferMemory
12
  from langchain.llms import HuggingFaceHub
13
 
14
 
15
- default_persist_directory = './chroma_HF/'
16
  MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
 
 
17
 
18
  # Load PDF document and create doc splits
19
  def load_doc(list_file_path, chunk_size, chunk_overlap):
@@ -43,7 +45,7 @@ def create_db(splits):
43
  def load_db():
44
  embedding = HuggingFaceEmbeddings()
45
  vectordb = Chroma(
46
- persist_directory=default_persist_directory,
47
  embedding_function=embedding,
48
  )
49
  return vectordb
@@ -158,13 +160,6 @@ def demo():
158
  with gr.Tab("Step 1 - Document pre-processing"):
159
  with gr.Row():
160
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
161
- with gr.Row():
162
- db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
163
- with gr.Accordion("Advanced options - Document text splitter", open=False):
164
- with gr.Row():
165
- slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
166
- with gr.Row():
167
- slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
168
  with gr.Row():
169
  db_progress = gr.Textbox(label="Vector database initialization", value="None")
170
  with gr.Row():
@@ -198,7 +193,7 @@ def demo():
198
  # Preprocessing events
199
  #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
200
  db_btn.click(initialize_database, \
201
- inputs=[document, slider_chunk_size, slider_chunk_overlap], \
202
  outputs=[vector_db, db_progress])
203
  qachain_btn.click(initialize_LLM, \
204
  inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \
 
12
  from langchain.llms import HuggingFaceHub
13
 
14
 
15
+ VECTOR_DIR = './chroma_HF/'
16
  MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
17
+ CHUNK_SIZE = 600
18
+ CHUNK_OVERLAP = 40
19
 
20
  # Load PDF document and create doc splits
21
  def load_doc(list_file_path, chunk_size, chunk_overlap):
 
45
  def load_db():
46
  embedding = HuggingFaceEmbeddings()
47
  vectordb = Chroma(
48
+ persist_directory=VECTOR_DIR,
49
  embedding_function=embedding,
50
  )
51
  return vectordb
 
160
  with gr.Tab("Step 1 - Document pre-processing"):
161
  with gr.Row():
162
  document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
 
 
 
 
 
 
 
163
  with gr.Row():
164
  db_progress = gr.Textbox(label="Vector database initialization", value="None")
165
  with gr.Row():
 
193
  # Preprocessing events
194
  #upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
195
  db_btn.click(initialize_database, \
196
+ inputs=[document, CHUNK_SIZE, CHUNK_OVERLAP], \
197
  outputs=[vector_db, db_progress])
198
  qachain_btn.click(initialize_LLM, \
199
  inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \