Hammad712 commited on
Commit
41088d6
·
verified ·
1 Parent(s): 376d7c4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +30 -33
main.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import zipfile
3
  import tempfile
4
-
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
 
@@ -25,27 +24,37 @@ class QueryRequest(BaseModel):
25
  question: str
26
 
27
 
28
- def _unpack_faiss(src_path: str, extract_to: str) -> str:
29
  """
30
- If src_path is a valid .zip archive, unzip it into extract_to and
31
- return the subdirectory that contains the .faiss index.
32
- If src_path is already a directory, return it directly.
33
  """
34
- # 1) True ZIP file?
35
  if zipfile.is_zipfile(src_path):
 
36
  with zipfile.ZipFile(src_path, "r") as zf:
37
- zf.extractall(extract_to)
38
- # scan until we find any .faiss file
39
- for root, _, files in os.walk(extract_to):
40
  if any(f.endswith(".faiss") for f in files):
41
  return root
42
  raise RuntimeError(f"No .faiss index found inside ZIP: {src_path}")
43
-
44
- # 2) Already a folder?
45
- if os.path.isdir(src_path):
46
  return src_path
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- raise RuntimeError(f"Path is neither a valid ZIP nor a directory: {src_path}")
 
 
 
49
 
50
 
51
  @app.on_event("startup")
@@ -57,7 +66,7 @@ def load_components():
57
  model="meta-llama/llama-4-scout-17b-16e-instruct",
58
  temperature=0,
59
  max_tokens=1024,
60
- api_key=os.getenv("api_key"),
61
  )
62
  embeddings = HuggingFaceEmbeddings(
63
  model_name="intfloat/multilingual-e5-large",
@@ -66,29 +75,17 @@ def load_components():
66
  )
67
 
68
  # --- 2) Load & merge two FAISS indexes ---
69
- # (these can be either real .zip files or existing folders)
70
- src1 = "faiss_index.zip" # or "faiss_index" if it's already a folder
71
- src2 = "faiss_index_extra.zip" # or "faiss_index_extra"
72
-
73
- tmp1 = tempfile.TemporaryDirectory()
74
- tmp2 = tempfile.TemporaryDirectory()
75
-
76
- dir1 = _unpack_faiss(src1, tmp1.name)
77
- dir2 = _unpack_faiss(src2, tmp2.name)
78
-
79
- vs1 = FAISS.load_local(dir1, embeddings, allow_dangerous_deserialization=True)
80
- vs2 = FAISS.load_local(dir2, embeddings, allow_dangerous_deserialization=True)
81
-
82
- vs1.merge_from(vs2)
83
- vectorstore = vs1
84
 
85
  # --- 3) Build retriever & QA chain ---
86
  retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
87
  prompt = PromptTemplate(
88
  template="""
89
- You are an expert assistant on Islamic knowledge.
90
- Use **only** the information in the “Retrieved context” to answer the user’s question.
91
- Do **not** add any outside information, personal opinions, or conjecture—if the answer is not contained in the context, reply with “لا أعلم”.
92
  Be concise, accurate, and directly address the user’s question.
93
 
94
  Retrieved context:
@@ -114,7 +111,7 @@ Your response:
114
 
115
  @app.get("/")
116
  def root():
117
- return {"message": "Arabic Hadith Finder API is up..."}
118
 
119
 
120
  @app.post("/query")
 
1
  import os
2
  import zipfile
3
  import tempfile
 
4
  from fastapi import FastAPI, HTTPException
5
  from pydantic import BaseModel
6
 
 
24
  question: str
25
 
26
 
27
+ def _unpack_faiss(src_path: str) -> str:
28
  """
29
+ If src_path is a ZIP, unzip it into a temp dir and return the folder
30
+ containing the .faiss files; if it’s already a folder, return it.
 
31
  """
 
32
  if zipfile.is_zipfile(src_path):
33
+ tmp = tempfile.TemporaryDirectory()
34
  with zipfile.ZipFile(src_path, "r") as zf:
35
+ zf.extractall(tmp.name)
36
+ for root, _, files in os.walk(tmp.name):
 
37
  if any(f.endswith(".faiss") for f in files):
38
  return root
39
  raise RuntimeError(f"No .faiss index found inside ZIP: {src_path}")
40
+ elif os.path.isdir(src_path):
 
 
41
  return src_path
42
+ else:
43
+ raise RuntimeError(f"Path is neither a valid ZIP nor a directory: {src_path}")
44
+
45
+
46
+ def load_and_merge_faiss(path1: str, path2: str, embeddings: HuggingFaceEmbeddings) -> FAISS:
47
+ """
48
+ Load two FAISS indexes (either zip files or folders), merge them,
49
+ and return the combined FAISS vectorstore.
50
+ """
51
+ dir1 = _unpack_faiss(path1)
52
+ dir2 = _unpack_faiss(path2)
53
 
54
+ vs1 = FAISS.load_local(dir1, embeddings, allow_dangerous_deserialization=True)
55
+ vs2 = FAISS.load_local(dir2, embeddings, allow_dangerous_deserialization=True)
56
+ vs1.merge_from(vs2)
57
+ return vs1
58
 
59
 
60
  @app.on_event("startup")
 
66
  model="meta-llama/llama-4-scout-17b-16e-instruct",
67
  temperature=0,
68
  max_tokens=1024,
69
+ api_key=os.getenv("API_KEY"),
70
  )
71
  embeddings = HuggingFaceEmbeddings(
72
  model_name="intfloat/multilingual-e5-large",
 
75
  )
76
 
77
  # --- 2) Load & merge two FAISS indexes ---
78
+ src1 = os.getenv("FAISS_INDEX_PATH_1", "faiss_index.zip")
79
+ src2 = os.getenv("FAISS_INDEX_PATH_2", "faiss_index_extra.zip")
80
+ vectorstore = load_and_merge_faiss(src1, src2, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  # --- 3) Build retriever & QA chain ---
83
  retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
84
  prompt = PromptTemplate(
85
  template="""
86
+ You are an expert assistant on Islamic knowledge.
87
+ Use **only** the information in the “Retrieved context” to answer the user’s question.
88
+ Do **not** add any outside information, personal opinions, or conjecture—if the answer is not contained in the context, reply with “لا أعلم”.
89
  Be concise, accurate, and directly address the user’s question.
90
 
91
  Retrieved context:
 
111
 
112
  @app.get("/")
113
  def root():
114
+ return {"message": "Arabic Hadith Finder API is up and running!"}
115
 
116
 
117
  @app.post("/query")