File size: 2,450 Bytes
4e0c319
d5b8099
 
2ffecbc
 
4e0c319
2ffecbc
d5b8099
4e0c319
2ffecbc
947ade2
 
2ffecbc
 
947ade2
4e0c319
947ade2
2ffecbc
d5b8099
4e0c319
2ffecbc
 
 
 
 
 
 
 
 
6787b76
2ffecbc
 
 
 
 
 
d5b8099
2ffecbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947ade2
 
d5b8099
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from sentence_transformers import SentenceTransformer, util
import torch
import faiss
import chromadb

# Load the SentenceTransformer model for vector embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# FAQ dataset (this can be expanded)
faq_data = [
    ("What is Hugging Face?", "Hugging Face is a company specializing in AI and machine learning, known for their open-source models and datasets."),
    ("What is AI?", "Artificial Intelligence (AI) is the simulation of human intelligence in machines.")
    # Add more FAQ pairs...
]

corpus = [item[0] for item in faq_data]  # Questions only
answers = {item[0]: item[1] for item in faq_data}  # Map questions to answers
corpus_embeddings = model.encode(corpus, convert_to_tensor=True)

# Initialize FAISS Index
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings.cpu().numpy())

# Initialize Chroma vector store
client = chromadb.Client()
collection = client.create_collection(name="faq_data")
for i, text in enumerate(corpus):
    collection.add(
        ids=[f"faq_{i}"],  # Unique ID for each document (using the index i)
        documents=[text],
        metadatas=[{"source": f"faq_{i}"}],
        embeddings=[corpus_embeddings[i].cpu().numpy()],
    )

# Retrieval function using FAISS and Chroma
def retrieve(query):
    query_embedding = model.encode(query, convert_to_tensor=True).cpu().numpy()

    # Use FAISS for nearest neighbor search
    faiss_results = index.search(query_embedding, k=1)
    faiss_top_result_idx = faiss_results[1][0][0]
    faiss_top_score = faiss_results[0][0][0]

    # Use Chroma for semantic search
    chroma_results = collection.query(query_embeddings=[query_embedding], n_results=1)
    chroma_top_result = chroma_results['documents'][0]

    # Combining results from FAISS and Chroma
    if faiss_top_score > 0.5:
        return answers[corpus[faiss_top_result_idx]]
    else:
        return chroma_top_result or "Sorry, I didn’t understand that. Could you try asking something else?"

# Gradio interface to interact with the bot
iface = gr.Interface(fn=retrieve, 
                     inputs="text", 
                     outputs="text", 
                     live=True, 
                     title="RAG AI Bot with OCI AI Skills", 
                     description="Ask me anything related to Hugging Face, Oracle OCI AI, or general knowledge!")

# Launch the Gradio interface
iface.launch()