Spaces:
Sleeping
Sleeping
File size: 3,744 Bytes
929a283 93452e4 929a283 93452e4 929a283 25ba997 f67ae72 25ba997 929a283 f67ae72 6dc00a6 155ba37 6dc00a6 929a283 0bdc9aa 6dc00a6 796b144 0bdc9aa 6dc00a6 929a283 155ba37 929a283 0bdc9aa 6dc00a6 0bdc9aa 155ba37 929a283 f67ae72 b29db7c f67ae72 d4339db b29db7c 929a283 b29db7c 929a283 0bdc9aa f67ae72 0bdc9aa f67ae72 929a283 b29db7c 929a283 b29db7c 929a283 b29db7c 929a283 b29db7c 0bdc9aa b29db7c 0bdc9aa b29db7c 0bdc9aa b29db7c 0bdc9aa b29db7c 929a283 b29db7c 3d70771 929a283 b29db7c 97c8253 3d70771 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio as gr
import faiss
import numpy as np
import openai
from sentence_transformers import SentenceTransformer
from nltk.tokenize import sent_tokenize
import nltk
import torch
# Download the required NLTK data
nltk.download('punkt')
# Paths to your files
faiss_path = "manual_chunked_faiss_index_500.bin"
manual_path = "ubuntu_manual.txt"
# Load the Ubuntu manual from a .txt file
try:
with open(manual_path, "r", encoding="utf-8") as file:
full_text = file.read()
except FileNotFoundError:
raise FileNotFoundError(f"The file {manual_path} was not found.")
# Function to chunk the text into smaller pieces
def chunk_text(text, chunk_size=500):
sentences = sent_tokenize(text)
chunks = []
current_chunk = []
for sentence in sentences:
if len(current_chunk) + len(sentence.split()) <= chunk_size:
current_chunk.append(sentence)
else:
chunks.append(" ".join(current_chunk))
current_chunk = [sentence]
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
# Apply chunking to the entire text
manual_chunks = chunk_text(full_text, chunk_size=500)
# Load your FAISS index
try:
index = faiss.read_index(faiss_path)
except Exception as e:
raise RuntimeError(f"Failed to load FAISS index: {e}")
# Load the tokenizer and model for embeddings
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# OpenAI API key
openai.api_key = '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'
# Function to create embeddings
def embed_text(text_list):
return np.array(embedding_model.encode(text_list), dtype=np.float32)
# Function to retrieve relevant chunks for a user query
def retrieve_chunks(query, k=5):
query_embedding = embed_text([query])
try:
distances, indices = index.search(query_embedding, k=k)
print("Distances:", distances)
print("Indices:", indices)
except Exception as e:
raise RuntimeError(f"FAISS search failed: {e}")
if len(indices[0]) == 0:
return [], distances, indices
valid_indices = [i for i in indices[0] if i < len(manual_chunks)]
if not valid_indices:
return [], distances, indices
relevant_chunks = [manual_chunks[i] for i in valid_indices]
return relevant_chunks, distances, indices
# Function to perform RAG: Retrieve chunks and generate a response
def rag_response(query, k=5, max_tokens=150):
try:
relevant_chunks, distances, indices = retrieve_chunks(query, k=k)
if not relevant_chunks:
return "Sorry, I couldn't find relevant information.", distances, indices
augmented_input = query + "\n" + "\n".join(relevant_chunks)
# Generate response using OpenAI API
response = openai.Completion.create(
model="gpt-3.5-turbo",
prompt=augmented_input,
max_tokens=max_tokens,
temperature=0.7
)
generated_text = response.choices[0].text.strip()
return generated_text, distances, indices
except Exception as e:
return f"An error occurred: {e}", [], []
# Gradio Interface
def format_output(response, distances, indices):
# Format output to include distances and indices
formatted_response = f"Response: {response}\n\nDistances: {distances}\n\nIndices: {indices}"
return formatted_response
iface = gr.Interface(
fn=rag_response,
inputs="text",
outputs="text",
title="RAG Chatbot with FAISS and GPT-3.5",
description="Ask me anything!",
live=True
)
if __name__ == "__main__":
iface.launch()
|