NaimaAqeel commited on
Commit
47ecda0
·
verified ·
1 Parent(s): d7100c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -20
app.py CHANGED
@@ -7,6 +7,9 @@ import numpy as np
7
  import pickle
8
  import gradio as gr
9
  from typing import List
 
 
 
10
 
11
  # Function to extract text from a PDF file
12
  def extract_text_from_pdf(pdf_path):
@@ -31,29 +34,26 @@ api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
31
  if not api_token:
32
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
33
 
 
 
 
 
 
 
34
  # Initialize the HuggingFace embeddings
35
- embedding = SentenceTransformer('all-MiniLM-L6-v2')
36
 
37
  # Load or create FAISS index
38
  index_path = "faiss_index.pkl"
39
- document_texts_path = "document_texts.pkl"
40
-
41
  if os.path.exists(index_path):
42
  with open(index_path, "rb") as f:
43
- index = pickle.load(f)
44
  else:
45
  # Create a new FAISS index if it doesn't exist
46
- index = faiss.IndexFlatL2(embedding.get_sentence_embedding_dimension())
47
- with open(index_path, "wb") as f:
48
- pickle.dump(index, f)
49
-
50
- if os.path.exists(document_texts_path):
51
- with open(document_texts_path, "rb") as f:
52
- document_texts = pickle.load(f)
53
- else:
54
  document_texts = []
55
- with open(document_texts_path, "wb") as f:
56
- pickle.dump(document_texts, f)
57
 
58
  def upload_files(files):
59
  global index, document_texts
@@ -68,25 +68,23 @@ def upload_files(files):
68
  f.write(content)
69
  text = extract_text_from_docx("temp.docx")
70
  else:
71
- return {"error": "Unsupported file format"}
72
 
73
  # Process the text and update FAISS index
74
  sentences = text.split("\n")
75
- embeddings = embedding.encode(sentences)
76
  index.add(np.array(embeddings))
77
  document_texts.append(text)
78
 
79
  # Save the updated index and documents
80
  with open(index_path, "wb") as f:
81
- pickle.dump(index, f)
82
- with open(document_texts_path, "wb") as f:
83
- pickle.dump(document_texts, f)
84
 
85
  return "Files processed successfully"
86
 
87
  def query_text(text):
88
  # Encode the query text
89
- query_embedding = embedding.encode([text])
90
 
91
  # Search the FAISS index
92
  D, I = index.search(np.array(query_embedding), k=5)
@@ -116,6 +114,9 @@ with gr.Blocks() as demo:
116
 
117
  demo.launch()
118
 
 
 
 
119
 
120
 
121
 
 
7
  import pickle
8
  import gradio as gr
9
  from typing import List
10
+ from langchain_community.llms import HuggingFaceEndpoint
11
+ from langchain_community.vectorstores import FAISS
12
+ from langchain_community.embeddings import HuggingFaceEmbeddings
13
 
14
  # Function to extract text from a PDF file
15
  def extract_text_from_pdf(pdf_path):
 
34
  if not api_token:
35
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
36
 
37
+ # Initialize the HuggingFace LLM
38
+ llm = HuggingFaceEndpoint(
39
+ endpoint_url="https://api-inference.huggingface.co/models/gpt2",
40
+ model_kwargs={"api_key": api_token}
41
+ )
42
+
43
  # Initialize the HuggingFace embeddings
44
+ embedding = HuggingFaceEmbeddings()
45
 
46
  # Load or create FAISS index
47
  index_path = "faiss_index.pkl"
 
 
48
  if os.path.exists(index_path):
49
  with open(index_path, "rb") as f:
50
+ index, document_texts = pickle.load(f)
51
  else:
52
  # Create a new FAISS index if it doesn't exist
53
+ index = faiss.IndexFlatL2(embedding_model.get_sentence_embedding_dimension())
 
 
 
 
 
 
 
54
  document_texts = []
55
+ with open(index_path, "wb") as f:
56
+ pickle.dump((index, document_texts), f)
57
 
58
  def upload_files(files):
59
  global index, document_texts
 
68
  f.write(content)
69
  text = extract_text_from_docx("temp.docx")
70
  else:
71
+ return "Unsupported file format"
72
 
73
  # Process the text and update FAISS index
74
  sentences = text.split("\n")
75
+ embeddings = embedding_model.encode(sentences)
76
  index.add(np.array(embeddings))
77
  document_texts.append(text)
78
 
79
  # Save the updated index and documents
80
  with open(index_path, "wb") as f:
81
+ pickle.dump((index, document_texts), f)
 
 
82
 
83
  return "Files processed successfully"
84
 
85
  def query_text(text):
86
  # Encode the query text
87
+ query_embedding = embedding_model.encode([text])
88
 
89
  # Search the FAISS index
90
  D, I = index.search(np.array(query_embedding), k=5)
 
114
 
115
  demo.launch()
116
 
117
+
118
+
119
+
120
 
121
 
122