Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on Jun 4, 2024

Commit

47ecda0

verified ·

1 Parent(s): d7100c1

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -20

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ import numpy as np
 import pickle
 import gradio as gr
 from typing import List
 # Function to extract text from a PDF file
 def extract_text_from_pdf(pdf_path):
@@ -31,29 +34,26 @@ api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
 if not api_token:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
 # Initialize the HuggingFace embeddings
-embedding = SentenceTransformer('all-MiniLM-L6-v2')
 # Load or create FAISS index
 index_path = "faiss_index.pkl"
-document_texts_path = "document_texts.pkl"
 if os.path.exists(index_path):
     with open(index_path, "rb") as f:
-        index = pickle.load(f)
 else:
     # Create a new FAISS index if it doesn't exist
-    index = faiss.IndexFlatL2(embedding.get_sentence_embedding_dimension())
-    with open(index_path, "wb") as f:
-        pickle.dump(index, f)
-if os.path.exists(document_texts_path):
-    with open(document_texts_path, "rb") as f:
-        document_texts = pickle.load(f)
-else:
     document_texts = []
-    with open(document_texts_path, "wb") as f:
-        pickle.dump(document_texts, f)
 def upload_files(files):
     global index, document_texts
@@ -68,25 +68,23 @@ def upload_files(files):
                 f.write(content)
             text = extract_text_from_docx("temp.docx")
         else:
-            return {"error": "Unsupported file format"}
         # Process the text and update FAISS index
         sentences = text.split("\n")
-        embeddings = embedding.encode(sentences)
         index.add(np.array(embeddings))
         document_texts.append(text)
     # Save the updated index and documents
     with open(index_path, "wb") as f:
-        pickle.dump(index, f)
-    with open(document_texts_path, "wb") as f:
-        pickle.dump(document_texts, f)
     return "Files processed successfully"
 def query_text(text):
     # Encode the query text
-    query_embedding = embedding.encode([text])
     # Search the FAISS index
     D, I = index.search(np.array(query_embedding), k=5)
@@ -116,6 +114,9 @@ with gr.Blocks() as demo:
 demo.launch()

 import pickle
 import gradio as gr
 from typing import List
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
 # Function to extract text from a PDF file
 def extract_text_from_pdf(pdf_path):
 if not api_token:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
+# Initialize the HuggingFace LLM
+llm = HuggingFaceEndpoint(
+    endpoint_url="https://api-inference.huggingface.co/models/gpt2",
+    model_kwargs={"api_key": api_token}
+)
 # Initialize the HuggingFace embeddings
+embedding = HuggingFaceEmbeddings()
 # Load or create FAISS index
 index_path = "faiss_index.pkl"
 if os.path.exists(index_path):
     with open(index_path, "rb") as f:
+        index, document_texts = pickle.load(f)
 else:
     # Create a new FAISS index if it doesn't exist
+    index = faiss.IndexFlatL2(embedding_model.get_sentence_embedding_dimension())
     document_texts = []
+    with open(index_path, "wb") as f:
+        pickle.dump((index, document_texts), f)
 def upload_files(files):
     global index, document_texts
                 f.write(content)
             text = extract_text_from_docx("temp.docx")
         else:
+            return "Unsupported file format"
         # Process the text and update FAISS index
         sentences = text.split("\n")
+        embeddings = embedding_model.encode(sentences)
         index.add(np.array(embeddings))
         document_texts.append(text)
     # Save the updated index and documents
     with open(index_path, "wb") as f:
+        pickle.dump((index, document_texts), f)
     return "Files processed successfully"
 def query_text(text):
     # Encode the query text
+    query_embedding = embedding_model.encode([text])
     # Search the FAISS index
     D, I = index.search(np.array(query_embedding), k=5)
 demo.launch()