NaimaAqeel commited on
Commit
8ceb607
·
verified ·
1 Parent(s): 5ddc1bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -25
app.py CHANGED
@@ -5,14 +5,8 @@ from sentence_transformers import SentenceTransformer
5
  import faiss
6
  import numpy as np
7
  import pickle
8
- from langchain_community.llms import HuggingFaceEndpoint
9
- from langchain_community.vectorstores import FAISS
10
- from langchain_community.embeddings import HuggingFaceEmbeddings
11
  import gradio as gr
12
- from fastapi import FastAPI
13
-
14
- # Initialize FastAPI
15
- app = FastAPI()
16
 
17
  # Function to extract text from a PDF file
18
  def extract_text_from_pdf(pdf_path):
@@ -37,29 +31,29 @@ api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
37
  if not api_token:
38
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
39
 
40
- print(f"API Token: {api_token[:5]}...")
41
-
42
- # Initialize the HuggingFace LLM
43
- llm = HuggingFaceEndpoint(
44
- endpoint_url="https://api-inference.huggingface.co/models/gpt2",
45
- model_kwargs={"api_key": api_token}
46
- )
47
-
48
  # Initialize the HuggingFace embeddings
49
- embedding = HuggingFaceEmbeddings()
50
 
51
  # Load or create FAISS index
52
  index_path = "faiss_index.pkl"
 
 
53
  if os.path.exists(index_path):
54
  with open(index_path, "rb") as f:
55
  index = pickle.load(f)
 
 
56
  else:
57
  # Create a new FAISS index if it doesn't exist
58
- index = faiss.IndexFlatL2(embedding_model.get_sentence_embedding_dimension())
 
59
  with open(index_path, "wb") as f:
60
  pickle.dump(index, f)
 
 
61
 
62
  def upload_files(files):
 
63
  for file in files:
64
  content = file.read()
65
  if file.name.endswith('.pdf'):
@@ -75,26 +69,29 @@ def upload_files(files):
75
 
76
  # Process the text and update FAISS index
77
  sentences = text.split("\n")
78
- embeddings = embedding_model.encode(sentences)
79
  index.add(np.array(embeddings))
 
80
 
81
- # Save the updated index
82
  with open(index_path, "wb") as f:
83
  pickle.dump(index, f)
 
 
84
 
85
  return "Files processed successfully"
86
 
87
  def query_text(text):
88
  # Encode the query text
89
- query_embedding = embedding_model.encode([text])
90
 
91
  # Search the FAISS index
92
  D, I = index.search(np.array(query_embedding), k=5)
93
 
94
  top_documents = []
95
  for idx in I[0]:
96
- if idx != -1: # Ensure that a valid index is found
97
- top_documents.append(f"Document {idx}")
98
 
99
  return top_documents
100
 
@@ -116,9 +113,6 @@ with gr.Blocks() as demo:
116
 
117
  demo.launch()
118
 
119
- if __name__ == "__main__":
120
- import uvicorn
121
- uvicorn.run(app, host="0.0.0.0", port=8001)
122
 
123
 
124
 
 
5
  import faiss
6
  import numpy as np
7
  import pickle
 
 
 
8
  import gradio as gr
9
+ from typing import List
 
 
 
10
 
11
  # Function to extract text from a PDF file
12
  def extract_text_from_pdf(pdf_path):
 
31
  if not api_token:
32
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
33
 
 
 
 
 
 
 
 
 
34
  # Initialize the HuggingFace embeddings
35
+ embedding = SentenceTransformer('all-MiniLM-L6-v2')
36
 
37
  # Load or create FAISS index
38
  index_path = "faiss_index.pkl"
39
+ document_texts_path = "document_texts.pkl"
40
+
41
  if os.path.exists(index_path):
42
  with open(index_path, "rb") as f:
43
  index = pickle.load(f)
44
+ with open(document_texts_path, "rb") as f:
45
+ document_texts = pickle.load(f)
46
  else:
47
  # Create a new FAISS index if it doesn't exist
48
+ index = faiss.IndexFlatL2(embedding.get_sentence_embedding_dimension())
49
+ document_texts = []
50
  with open(index_path, "wb") as f:
51
  pickle.dump(index, f)
52
+ with open(document_texts_path, "wb") as f:
53
+ pickle.dump(document_texts, f)
54
 
55
  def upload_files(files):
56
+ global index, document_texts
57
  for file in files:
58
  content = file.read()
59
  if file.name.endswith('.pdf'):
 
69
 
70
  # Process the text and update FAISS index
71
  sentences = text.split("\n")
72
+ embeddings = embedding.encode(sentences)
73
  index.add(np.array(embeddings))
74
+ document_texts.append(text)
75
 
76
+ # Save the updated index and documents
77
  with open(index_path, "wb") as f:
78
  pickle.dump(index, f)
79
+ with open(document_texts_path, "wb") as f:
80
+ pickle.dump(document_texts, f)
81
 
82
  return "Files processed successfully"
83
 
84
  def query_text(text):
85
  # Encode the query text
86
+ query_embedding = embedding.encode([text])
87
 
88
  # Search the FAISS index
89
  D, I = index.search(np.array(query_embedding), k=5)
90
 
91
  top_documents = []
92
  for idx in I[0]:
93
+ if idx != -1 and idx < len(document_texts): # Ensure that a valid index is found
94
+ top_documents.append(document_texts[idx])
95
 
96
  return top_documents
97
 
 
113
 
114
  demo.launch()
115
 
 
 
 
116
 
117
 
118