Moha782 commited on
Commit
38a5c45
·
verified ·
1 Parent(s): 3e55561

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -7,12 +7,14 @@ import numpy as np
7
  from huggingface_hub import InferenceClient
8
  from sentence_transformers import SentenceTransformer
9
 
10
-
11
-
12
-
13
  # Extract text from PDF
14
  def extract_text_from_pdf(pdf_path):
15
  doc = fitz.open(pdf_path)
 
 
 
 
 
16
 
17
  # Build FAISS index
18
  def build_faiss_index(documents):
@@ -39,9 +41,15 @@ else:
39
 
40
  # Hugging Face client
41
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
42
  return [documents[i] for i in indices[0]]
43
 
44
- def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
45
  context = "\n\n".join(relevant_docs[:3]) # Limit context to top 3 documents
46
 
47
  # Limit history to the last 5 exchanges to reduce payload size
 
7
  from huggingface_hub import InferenceClient
8
  from sentence_transformers import SentenceTransformer
9
 
 
 
 
10
  # Extract text from PDF
11
  def extract_text_from_pdf(pdf_path):
12
  doc = fitz.open(pdf_path)
13
+ text = ""
14
+ for page_num in range(doc.page_count):
15
+ page = doc.load_page(page_num)
16
+ text += page.get_text()
17
+ return text.split("\n\n")
18
 
19
  # Build FAISS index
20
  def build_faiss_index(documents):
 
41
 
42
  # Hugging Face client
43
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
44
+
45
+ def retrieve_documents(query, k=5):
46
+ query_embedding = model.encode([query])
47
+ distances, indices = index.search(query_embedding, k)
48
  return [documents[i] for i in indices[0]]
49
 
50
+ async def respond(message, history, system_message, max_tokens, temperature, top_p):
51
+ relevant_docs = retrieve_documents(message)
52
+
53
  context = "\n\n".join(relevant_docs[:3]) # Limit context to top 3 documents
54
 
55
  # Limit history to the last 5 exchanges to reduce payload size