Spaces:
Sleeping
Sleeping
File size: 2,450 Bytes
4e0c319 d5b8099 2ffecbc 4e0c319 2ffecbc d5b8099 4e0c319 2ffecbc 947ade2 2ffecbc 947ade2 4e0c319 947ade2 2ffecbc d5b8099 4e0c319 2ffecbc 6787b76 2ffecbc d5b8099 2ffecbc 947ade2 d5b8099 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import torch
import faiss
import chromadb
# Load the SentenceTransformer model for vector embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
# FAQ dataset (this can be expanded)
faq_data = [
("What is Hugging Face?", "Hugging Face is a company specializing in AI and machine learning, known for their open-source models and datasets."),
("What is AI?", "Artificial Intelligence (AI) is the simulation of human intelligence in machines.")
# Add more FAQ pairs...
]
corpus = [item[0] for item in faq_data] # Questions only
answers = {item[0]: item[1] for item in faq_data} # Map questions to answers
corpus_embeddings = model.encode(corpus, convert_to_tensor=True)
# Initialize FAISS Index
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings.cpu().numpy())
# Initialize Chroma vector store
client = chromadb.Client()
collection = client.create_collection(name="faq_data")
for i, text in enumerate(corpus):
collection.add(
ids=[f"faq_{i}"], # Unique ID for each document (using the index i)
documents=[text],
metadatas=[{"source": f"faq_{i}"}],
embeddings=[corpus_embeddings[i].cpu().numpy()],
)
# Retrieval function using FAISS and Chroma
def retrieve(query):
query_embedding = model.encode(query, convert_to_tensor=True).cpu().numpy()
# Use FAISS for nearest neighbor search
faiss_results = index.search(query_embedding, k=1)
faiss_top_result_idx = faiss_results[1][0][0]
faiss_top_score = faiss_results[0][0][0]
# Use Chroma for semantic search
chroma_results = collection.query(query_embeddings=[query_embedding], n_results=1)
chroma_top_result = chroma_results['documents'][0]
# Combining results from FAISS and Chroma
if faiss_top_score > 0.5:
return answers[corpus[faiss_top_result_idx]]
else:
return chroma_top_result or "Sorry, I didn’t understand that. Could you try asking something else?"
# Gradio interface to interact with the bot
iface = gr.Interface(fn=retrieve,
inputs="text",
outputs="text",
live=True,
title="RAG AI Bot with OCI AI Skills",
description="Ask me anything related to Hugging Face, Oracle OCI AI, or general knowledge!")
# Launch the Gradio interface
iface.launch() |