MohammedNasser commited on
Commit
c2777d8
1 Parent(s): 1511464

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -5
app.py CHANGED
@@ -14,6 +14,10 @@ from gtts import gTTS
14
  import sys
15
  import pytesseract
16
  from pdf2image import convert_from_path
 
 
 
 
17
 
18
 
19
  # Load environment variables
@@ -41,6 +45,29 @@ def load_pdf(file_path):
41
  documents.append(text)
42
  return documents
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def prepare_vectorstore(data):
45
  index_dir = "faiss_index"
46
  if not os.path.exists(index_dir):
@@ -48,16 +75,21 @@ def prepare_vectorstore(data):
48
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
49
  texts = data
50
  vectorstore = FAISS.from_texts(texts, embeddings)
51
- vectorstore.save_local(index_dir)
52
-
53
  return vectorstore
 
54
 
55
- def load_vectorstore():
56
  index_dir = "faiss_index"
57
 
58
- # Ensure the directory exists before trying to load the index
59
  if not os.path.exists(index_dir):
60
- raise RuntimeError(f"FAISS index directory '{index_dir}' does not exist.")
 
 
 
 
 
61
 
62
  vectorstore = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
63
  return vectorstore
 
14
  import sys
15
  import pytesseract
16
  from pdf2image import convert_from_path
17
+ from huggingface_hub import Repository
18
+ from huggingface_hub import hf_hub_download
19
+
20
+
21
 
22
 
23
  # Load environment variables
 
45
  documents.append(text)
46
  return documents
47
 
48
+
49
+ import os
50
+ from langchain.vectorstores import FAISS
51
+ from huggingface_hub import Repository
52
+
53
+ def save_faiss_index_to_hub(vectorstore, repo_id="MohammedNasser/faiss-index"):
54
+ index_dir = "faiss_index"
55
+
56
+ # Ensure the index directory exists
57
+ if not os.path.exists(index_dir):
58
+ os.makedirs(index_dir)
59
+
60
+ # Save FAISS index locally
61
+ vectorstore.save_local(index_dir)
62
+
63
+ # Initialize Hugging Face repository
64
+ repo = Repository(local_dir=index_dir, clone_from=repo_id)
65
+
66
+ # Push the FAISS index files to the Hugging Face Hub
67
+ repo.push_to_hub(commit_message="Pushing FAISS index")
68
+
69
+ print(f"FAISS index saved to Hugging Face Hub: {repo_id}")
70
+
71
  def prepare_vectorstore(data):
72
  index_dir = "faiss_index"
73
  if not os.path.exists(index_dir):
 
75
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
76
  texts = data
77
  vectorstore = FAISS.from_texts(texts, embeddings)
78
+ save_faiss_index_to_hub(vectorstore)
 
79
  return vectorstore
80
+
81
 
82
+ def load_vectorstore(repo_id="MohammedNasser/faiss-index"):
83
  index_dir = "faiss_index"
84
 
85
+ # Ensure the index directory exists
86
  if not os.path.exists(index_dir):
87
+ os.makedirs(index_dir)
88
+
89
+
90
+
91
+ # Download the FAISS index files from Hugging Face Hub
92
+ hf_hub_download(repo_id=repo_id, filename="index.faiss", local_dir=index_dir)
93
 
94
  vectorstore = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
95
  return vectorstore