Spaces:

NaimaAqeel
/

Chatbot

Runtime error

App Files Files Community

NaimaAqeel commited on Jun 3, 2024

Commit

8ceb607

verified ·

1 Parent(s): 5ddc1bc

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -25

app.py CHANGED Viewed

@@ -5,14 +5,8 @@ from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 import pickle
-from langchain_community.llms import HuggingFaceEndpoint
-from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
 import gradio as gr
-from fastapi import FastAPI
-# Initialize FastAPI
-app = FastAPI()
 # Function to extract text from a PDF file
 def extract_text_from_pdf(pdf_path):
@@ -37,29 +31,29 @@ api_token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
 if not api_token:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
-print(f"API Token: {api_token[:5]}...")
-# Initialize the HuggingFace LLM
-llm = HuggingFaceEndpoint(
-    endpoint_url="https://api-inference.huggingface.co/models/gpt2",
-    model_kwargs={"api_key": api_token}
-)
 # Initialize the HuggingFace embeddings
-embedding = HuggingFaceEmbeddings()
 # Load or create FAISS index
 index_path = "faiss_index.pkl"
 if os.path.exists(index_path):
     with open(index_path, "rb") as f:
         index = pickle.load(f)
 else:
     # Create a new FAISS index if it doesn't exist
-    index = faiss.IndexFlatL2(embedding_model.get_sentence_embedding_dimension())
     with open(index_path, "wb") as f:
         pickle.dump(index, f)
 def upload_files(files):
     for file in files:
         content = file.read()
         if file.name.endswith('.pdf'):
@@ -75,26 +69,29 @@ def upload_files(files):
         # Process the text and update FAISS index
         sentences = text.split("\n")
-        embeddings = embedding_model.encode(sentences)
         index.add(np.array(embeddings))
-    # Save the updated index
     with open(index_path, "wb") as f:
         pickle.dump(index, f)
     return "Files processed successfully"
 def query_text(text):
     # Encode the query text
-    query_embedding = embedding_model.encode([text])
     # Search the FAISS index
     D, I = index.search(np.array(query_embedding), k=5)
     top_documents = []
     for idx in I[0]:
-        if idx != -1:  # Ensure that a valid index is found
-            top_documents.append(f"Document {idx}")
     return top_documents
@@ -116,9 +113,6 @@ with gr.Blocks() as demo:
 demo.launch()
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8001)

 import faiss
 import numpy as np
 import pickle
 import gradio as gr
+from typing import List
 # Function to extract text from a PDF file
 def extract_text_from_pdf(pdf_path):
 if not api_token:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable is not set")
 # Initialize the HuggingFace embeddings
+embedding = SentenceTransformer('all-MiniLM-L6-v2')
 # Load or create FAISS index
 index_path = "faiss_index.pkl"
+document_texts_path = "document_texts.pkl"
 if os.path.exists(index_path):
     with open(index_path, "rb") as f:
         index = pickle.load(f)
+    with open(document_texts_path, "rb") as f:
+        document_texts = pickle.load(f)
 else:
     # Create a new FAISS index if it doesn't exist
+    index = faiss.IndexFlatL2(embedding.get_sentence_embedding_dimension())
+    document_texts = []
     with open(index_path, "wb") as f:
         pickle.dump(index, f)
+    with open(document_texts_path, "wb") as f:
+        pickle.dump(document_texts, f)
 def upload_files(files):
+    global index, document_texts
     for file in files:
         content = file.read()
         if file.name.endswith('.pdf'):
         # Process the text and update FAISS index
         sentences = text.split("\n")
+        embeddings = embedding.encode(sentences)
         index.add(np.array(embeddings))
+        document_texts.append(text)
+    # Save the updated index and documents
     with open(index_path, "wb") as f:
         pickle.dump(index, f)
+    with open(document_texts_path, "wb") as f:
+        pickle.dump(document_texts, f)
     return "Files processed successfully"
 def query_text(text):
     # Encode the query text
+    query_embedding = embedding.encode([text])
     # Search the FAISS index
     D, I = index.search(np.array(query_embedding), k=5)
     top_documents = []
     for idx in I[0]:
+        if idx != -1 and idx < len(document_texts):  # Ensure that a valid index is found
+            top_documents.append(document_texts[idx])
     return top_documents
 demo.launch()