Spaces:

FridayMaster
/

CHATBOT1

Sleeping

App Files Files Community

FridayMaster commited on Aug 12, 2024

Commit

0bdc9aa

verified ·

1 Parent(s): 25ba997

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -26

app.py CHANGED Viewed

@@ -8,13 +8,14 @@ import nltk
 # Download the required NLTK data
 nltk.download('punkt')
-nltk.download('punkt_tab')
 # Load the Ubuntu manual from a .txt file
 with open("ubuntu_manual.txt", "r", encoding="utf-8") as file:
     full_text = file.read()
 # Function to chunk the text into smaller pieces
 def chunk_text(text, chunk_size=500):  # Larger chunks
@@ -38,7 +39,11 @@ def chunk_text(text, chunk_size=500):  # Larger chunks
 manual_chunks = chunk_text(full_text, chunk_size=500)
 # Load your FAISS index
-index = faiss.read_index("manual_chunked_faiss_index_500.bin")
 # Load your embedding model
 embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
@@ -46,6 +51,7 @@ embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
 # OpenAI API key
 openai.api_key = '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'
 # Function to create embeddings
 def embed_text(text_list):
     return np.array(embedding_model.encode(text_list), dtype=np.float32)
@@ -55,11 +61,10 @@ def retrieve_chunks(query, k=5):
     query_embedding = embed_text([query])
     # Search the FAISS index
-    distances, indices = index.search(query_embedding, k=k)
-    # Debugging: Print out the distances and indices
-    print("Distances:", distances)
-    print("Indices:", indices)
     # Check if indices are valid
     if len(indices[0]) == 0:
@@ -81,23 +86,26 @@ def truncate_input(text, max_length=512):
 # Function to perform RAG: Retrieve chunks and generate a response
 def rag_response(query, k=5, max_new_tokens=150):
-    # Step 1: Retrieve relevant chunks
-    relevant_chunks = retrieve_chunks(query, k=k)
-    if not relevant_chunks:
-        return "Sorry, I couldn't find relevant information."
-    # Step 2: Combine the query with retrieved chunks
-    augmented_input = query + "\n" + "\n".join(relevant_chunks)
-    # Truncate and encode the input
-    inputs = truncate_input(augmented_input)
-    # Generate response
-    outputs = generator_model.generate(inputs, max_new_tokens=max_new_tokens)
-    generated_text = generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text
 # Gradio Interface
 iface = gr.Interface(
@@ -111,3 +119,10 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 # Download the required NLTK data
 nltk.download('punkt')
 # Load the Ubuntu manual from a .txt file
+try:
+    # Load the Ubuntu manual from a .txt file
 with open("ubuntu_manual.txt", "r", encoding="utf-8") as file:
     full_text = file.read()
+except FileNotFoundError:
+    raise FileNotFoundError("The file /content/ubuntu_manual.txt was not found.")
 # Function to chunk the text into smaller pieces
 def chunk_text(text, chunk_size=500):  # Larger chunks
 manual_chunks = chunk_text(full_text, chunk_size=500)
 # Load your FAISS index
+try:
+    # Load your FAISS index
+    index = faiss.read_index("manual_chunked_faiss_index_500.bin")
+except Exception as e:
+    raise RuntimeError(f"Failed to load FAISS index: {e}")
 # Load your embedding model
 embedding_model = SentenceTransformer('FridayMaster/fine_tune_embedding')
 # OpenAI API key
 openai.api_key = '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'
 # Function to create embeddings
 def embed_text(text_list):
     return np.array(embedding_model.encode(text_list), dtype=np.float32)
     query_embedding = embed_text([query])
     # Search the FAISS index
+    try:
+        distances, indices = index.search(query_embedding, k=k)
+    except Exception as e:
+        raise RuntimeError(f"FAISS search failed: {e}")
     # Check if indices are valid
     if len(indices[0]) == 0:
 # Function to perform RAG: Retrieve chunks and generate a response
 def rag_response(query, k=5, max_new_tokens=150):
+    try:
+        # Step 1: Retrieve relevant chunks
+        relevant_chunks = retrieve_chunks(query, k=k)
+        if not relevant_chunks:
+            return "Sorry, I couldn't find relevant information."
+        # Step 2: Combine the query with retrieved chunks
+        augmented_input = query + "\n" + "\n".join(relevant_chunks)
+        # Truncate and encode the input
+        inputs = truncate_input(augmented_input)
+        # Generate response
+        outputs = generator_model.generate(inputs, max_new_tokens=max_new_tokens)
+        generated_text = generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return generated_text
+    except Exception as e:
+        return f"An error occurred: {e}"
 # Gradio Interface
 iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()