Spaces:

willco-afk
/

RAG_AI_BOT

Sleeping

File size: 2,450 Bytes

4e0c319
d5b8099
 
2ffecbc
 
4e0c319
2ffecbc
d5b8099
4e0c319
2ffecbc
947ade2
 
2ffecbc
 
947ade2
4e0c319
947ade2
2ffecbc
d5b8099
4e0c319
2ffecbc
 
 
 
 
 
 
 
 
6787b76
2ffecbc
 
 
 
 
 
d5b8099
2ffecbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947ade2
 
d5b8099

import gradio as gr
from sentence_transformers import SentenceTransformer, util
import torch
import faiss
import chromadb

# Load the SentenceTransformer model for vector embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# FAQ dataset (this can be expanded)
faq_data = [
    ("What is Hugging Face?", "Hugging Face is a company specializing in AI and machine learning, known for their open-source models and datasets."),
    ("What is AI?", "Artificial Intelligence (AI) is the simulation of human intelligence in machines.")
    # Add more FAQ pairs...
]

corpus = [item[0] for item in faq_data]  # Questions only
answers = {item[0]: item[1] for item in faq_data}  # Map questions to answers
corpus_embeddings = model.encode(corpus, convert_to_tensor=True)

# Initialize FAISS Index
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings.cpu().numpy())

# Initialize Chroma vector store
client = chromadb.Client()
collection = client.create_collection(name="faq_data")
for i, text in enumerate(corpus):
    collection.add(
        ids=[f"faq_{i}"],  # Unique ID for each document (using the index i)
        documents=[text],
        metadatas=[{"source": f"faq_{i}"}],
        embeddings=[corpus_embeddings[i].cpu().numpy()],
    )

# Retrieval function using FAISS and Chroma
def retrieve(query):
    query_embedding = model.encode(query, convert_to_tensor=True).cpu().numpy()

    # Use FAISS for nearest neighbor search
    faiss_results = index.search(query_embedding, k=1)
    faiss_top_result_idx = faiss_results[1][0][0]
    faiss_top_score = faiss_results[0][0][0]

    # Use Chroma for semantic search
    chroma_results = collection.query(query_embeddings=[query_embedding], n_results=1)
    chroma_top_result = chroma_results['documents'][0]

    # Combining results from FAISS and Chroma
    if faiss_top_score > 0.5:
        return answers[corpus[faiss_top_result_idx]]
    else:
        return chroma_top_result or "Sorry, I didn’t understand that. Could you try asking something else?"

# Gradio interface to interact with the bot
iface = gr.Interface(fn=retrieve, 
                     inputs="text", 
                     outputs="text", 
                     live=True, 
                     title="RAG AI Bot with OCI AI Skills", 
                     description="Ask me anything related to Hugging Face, Oracle OCI AI, or general knowledge!")

# Launch the Gradio interface
iface.launch()