Spaces:
Sleeping
Sleeping
MohammedNasser
commited on
Commit
•
c2777d8
1
Parent(s):
1511464
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,10 @@ from gtts import gTTS
|
|
14 |
import sys
|
15 |
import pytesseract
|
16 |
from pdf2image import convert_from_path
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
# Load environment variables
|
@@ -41,6 +45,29 @@ def load_pdf(file_path):
|
|
41 |
documents.append(text)
|
42 |
return documents
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def prepare_vectorstore(data):
|
45 |
index_dir = "faiss_index"
|
46 |
if not os.path.exists(index_dir):
|
@@ -48,16 +75,21 @@ def prepare_vectorstore(data):
|
|
48 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
|
49 |
texts = data
|
50 |
vectorstore = FAISS.from_texts(texts, embeddings)
|
51 |
-
vectorstore
|
52 |
-
|
53 |
return vectorstore
|
|
|
54 |
|
55 |
-
def load_vectorstore():
|
56 |
index_dir = "faiss_index"
|
57 |
|
58 |
-
# Ensure the directory exists
|
59 |
if not os.path.exists(index_dir):
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
vectorstore = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
|
63 |
return vectorstore
|
|
|
14 |
import sys
|
15 |
import pytesseract
|
16 |
from pdf2image import convert_from_path
|
17 |
+
from huggingface_hub import Repository
|
18 |
+
from huggingface_hub import hf_hub_download
|
19 |
+
|
20 |
+
|
21 |
|
22 |
|
23 |
# Load environment variables
|
|
|
45 |
documents.append(text)
|
46 |
return documents
|
47 |
|
48 |
+
|
49 |
+
import os
|
50 |
+
from langchain.vectorstores import FAISS
|
51 |
+
from huggingface_hub import Repository
|
52 |
+
|
53 |
+
def save_faiss_index_to_hub(vectorstore, repo_id="MohammedNasser/faiss-index"):
|
54 |
+
index_dir = "faiss_index"
|
55 |
+
|
56 |
+
# Ensure the index directory exists
|
57 |
+
if not os.path.exists(index_dir):
|
58 |
+
os.makedirs(index_dir)
|
59 |
+
|
60 |
+
# Save FAISS index locally
|
61 |
+
vectorstore.save_local(index_dir)
|
62 |
+
|
63 |
+
# Initialize Hugging Face repository
|
64 |
+
repo = Repository(local_dir=index_dir, clone_from=repo_id)
|
65 |
+
|
66 |
+
# Push the FAISS index files to the Hugging Face Hub
|
67 |
+
repo.push_to_hub(commit_message="Pushing FAISS index")
|
68 |
+
|
69 |
+
print(f"FAISS index saved to Hugging Face Hub: {repo_id}")
|
70 |
+
|
71 |
def prepare_vectorstore(data):
|
72 |
index_dir = "faiss_index"
|
73 |
if not os.path.exists(index_dir):
|
|
|
75 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
|
76 |
texts = data
|
77 |
vectorstore = FAISS.from_texts(texts, embeddings)
|
78 |
+
save_faiss_index_to_hub(vectorstore)
|
|
|
79 |
return vectorstore
|
80 |
+
|
81 |
|
82 |
+
def load_vectorstore(repo_id="MohammedNasser/faiss-index"):
|
83 |
index_dir = "faiss_index"
|
84 |
|
85 |
+
# Ensure the index directory exists
|
86 |
if not os.path.exists(index_dir):
|
87 |
+
os.makedirs(index_dir)
|
88 |
+
|
89 |
+
|
90 |
+
|
91 |
+
# Download the FAISS index files from Hugging Face Hub
|
92 |
+
hf_hub_download(repo_id=repo_id, filename="index.faiss", local_dir=index_dir)
|
93 |
|
94 |
vectorstore = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
|
95 |
return vectorstore
|