Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,6 +29,7 @@ list_llm = [
|
|
29 |
]
|
30 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
31 |
|
|
|
32 |
@spaces.GPU
|
33 |
def load_doc(list_file_path, chunk_size, chunk_overlap):
|
34 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
@@ -36,15 +37,18 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
36 |
for loader in loaders:
|
37 |
pages.extend(loader.load())
|
38 |
text_splitter = RecursiveCharacterTextSplitter(
|
39 |
-
chunk_size=chunk_size,
|
40 |
-
chunk_overlap=chunk_overlap)
|
41 |
doc_splits = text_splitter.split_documents(pages)
|
42 |
return doc_splits
|
43 |
|
44 |
# Create vector database
|
45 |
def create_db(splits, collection_name):
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
48 |
new_client = chromadb.EphemeralClient()
|
49 |
vectordb = Chroma.from_documents(
|
50 |
documents=splits,
|
|
|
29 |
]
|
30 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
31 |
|
32 |
+
@spaces.GPU
|
33 |
@spaces.GPU
|
34 |
def load_doc(list_file_path, chunk_size, chunk_overlap):
|
35 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
|
|
37 |
for loader in loaders:
|
38 |
pages.extend(loader.load())
|
39 |
text_splitter = RecursiveCharacterTextSplitter(
|
40 |
+
chunk_size = chunk_size,
|
41 |
+
chunk_overlap = chunk_overlap)
|
42 |
doc_splits = text_splitter.split_documents(pages)
|
43 |
return doc_splits
|
44 |
|
45 |
# Create vector database
|
46 |
def create_db(splits, collection_name):
|
47 |
+
# Set CUDA_VISIBLE_DEVICES if GPU is available
|
48 |
+
if torch.cuda.is_available():
|
49 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
50 |
+
|
51 |
+
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
52 |
new_client = chromadb.EphemeralClient()
|
53 |
vectordb = Chroma.from_documents(
|
54 |
documents=splits,
|