Spaces:
Sleeping
Sleeping
import gradio as gr | |
from sentence_transformers import SentenceTransformer, util | |
import torch | |
# Load the sentence-transformer model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Expanded FAQ dataset with more diverse responses | |
faq_data = [ | |
("What is Hugging Face?", "Hugging Face is a company specializing in AI and machine learning, known for their open-source models and datasets."), | |
("How does Hugging Face help in AI?", "Hugging Face provides tools, libraries, and pre-trained models to make machine learning easier and more accessible."), | |
("What is machine learning?", "Machine learning is a subset of AI that enables computers to learn from data and improve over time without being explicitly programmed."), | |
("What is a transformer model?", "A transformer model is a deep learning model that uses attention mechanisms to process and generate sequences of data, such as text or speech."), | |
("Can I use Hugging Face models in production?", "Yes, Hugging Face provides tools and frameworks like `transformers` for deploying models into production environments."), | |
("What is RAG?", "Retrieval-Augmented Generation (RAG) combines pre-trained models with retrieval systems to answer questions using both the knowledge from the model and external documents."), | |
("What is AI?", "Artificial Intelligence (AI) is the simulation of human intelligence in machines, enabling them to perform tasks that typically require human cognition."), | |
("Tell me a joke", "Why don't skeletons fight each other? They don’t have the guts!"), | |
("What is the capital of France?", "The capital of France is Paris."), | |
("How can I contact support?", "You can contact support via our website or email for assistance."), | |
("What's the weather like today?", "Sorry, I don't have access to real-time data, but I suggest checking a weather app for the latest updates.") | |
] | |
# Encode the FAQ dataset | |
corpus = [item[0] for item in faq_data] # Questions only | |
answers = {item[0]: item[1] for item in faq_data} # Map questions to their answers | |
corpus_embeddings = model.encode(corpus, convert_to_tensor=True) | |
# Function to retrieve the most relevant FAQ based on user query | |
def retrieve(query): | |
query_embedding = model.encode(query, convert_to_tensor=True) | |
cosine_scores = util.pytorch_cos_sim(query_embedding, corpus_embeddings)[0] | |
top_result_idx = torch.argmax(cosine_scores) | |
top_score = cosine_scores[top_result_idx].item() | |
# Threshold for similarity score | |
if top_score < 0.5: # Adjust threshold as needed | |
return "I didn’t understand that. Could you try asking something else?" | |
# Return the most relevant answer based on the query | |
return answers[corpus[top_result_idx]] | |
# Create Gradio interface | |
iface = gr.Interface(fn=retrieve, inputs="text", outputs="text", live=True, title="RAG AI Bot", description="Ask me anything related to Hugging Face or general knowledge!") | |
# Launch the Gradio interface | |
iface.launch() |