Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,10 @@ from langchain.memory import ConversationBufferMemory
|
|
12 |
from langchain.llms import HuggingFaceHub
|
13 |
|
14 |
|
15 |
-
|
16 |
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
|
|
|
|
|
17 |
|
18 |
# Load PDF document and create doc splits
|
19 |
def load_doc(list_file_path, chunk_size, chunk_overlap):
|
@@ -43,7 +45,7 @@ def create_db(splits):
|
|
43 |
def load_db():
|
44 |
embedding = HuggingFaceEmbeddings()
|
45 |
vectordb = Chroma(
|
46 |
-
persist_directory=
|
47 |
embedding_function=embedding,
|
48 |
)
|
49 |
return vectordb
|
@@ -158,13 +160,6 @@ def demo():
|
|
158 |
with gr.Tab("Step 1 - Document pre-processing"):
|
159 |
with gr.Row():
|
160 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
161 |
-
with gr.Row():
|
162 |
-
db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
|
163 |
-
with gr.Accordion("Advanced options - Document text splitter", open=False):
|
164 |
-
with gr.Row():
|
165 |
-
slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
|
166 |
-
with gr.Row():
|
167 |
-
slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
|
168 |
with gr.Row():
|
169 |
db_progress = gr.Textbox(label="Vector database initialization", value="None")
|
170 |
with gr.Row():
|
@@ -198,7 +193,7 @@ def demo():
|
|
198 |
# Preprocessing events
|
199 |
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|
200 |
db_btn.click(initialize_database, \
|
201 |
-
inputs=[document,
|
202 |
outputs=[vector_db, db_progress])
|
203 |
qachain_btn.click(initialize_LLM, \
|
204 |
inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \
|
|
|
12 |
from langchain.llms import HuggingFaceHub
|
13 |
|
14 |
|
15 |
+
VECTOR_DIR = './chroma_HF/'
|
16 |
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
|
17 |
+
CHUNK_SIZE = 600
|
18 |
+
CHUNK_OVERLAP = 40
|
19 |
|
20 |
# Load PDF document and create doc splits
|
21 |
def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
|
45 |
def load_db():
|
46 |
embedding = HuggingFaceEmbeddings()
|
47 |
vectordb = Chroma(
|
48 |
+
persist_directory=VECTOR_DIR,
|
49 |
embedding_function=embedding,
|
50 |
)
|
51 |
return vectordb
|
|
|
160 |
with gr.Tab("Step 1 - Document pre-processing"):
|
161 |
with gr.Row():
|
162 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
with gr.Row():
|
164 |
db_progress = gr.Textbox(label="Vector database initialization", value="None")
|
165 |
with gr.Row():
|
|
|
193 |
# Preprocessing events
|
194 |
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|
195 |
db_btn.click(initialize_database, \
|
196 |
+
inputs=[document, CHUNK_SIZE, CHUNK_OVERLAP], \
|
197 |
outputs=[vector_db, db_progress])
|
198 |
qachain_btn.click(initialize_LLM, \
|
199 |
inputs=[slider_temperature, slider_maxtokens, slider_topk, vector_db], \
|