Spaces:
Runtime error
Runtime error
File size: 2,079 Bytes
e7abd03 716f829 e7abd03 caf5793 2ef3675 236d6fa 716f829 c30444f c2371ad e7abd03 716f829 e56dc84 d0ab9b7 e7abd03 d1e3096 10c14f8 e7abd03 716f829 2ef3675 716f829 d0ab9b7 716f829 b71f887 d0ab9b7 2ef3675 716f829 caf5793 d1e3096 d0ab9b7 e7abd03 716f829 e0b330b 6f729e6 e7abd03 62cb2dc 6f729e6 716f829 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
# Ensure you have GPU support
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load the CSV file with embeddings
df = pd.read_csv('RBDx10kstats.csv')
df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
# Convert embeddings to tensor for efficient retrieval
embeddings = torch.tensor(df['embedding'].tolist(), device=device)
# Load the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
# Load the ai model for response generation
ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2-large")
ai_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2-large").to(device)
# Define the function to find the most relevant document
@spaces.GPU(duration=120)
def retrieve_relevant_doc(query):
query_embedding = model.encode(query, convert_to_tensor=True, device=device)
similarities = util.pytorch_cos_sim(query_embedding, embeddings)[0]
best_match_idx = torch.argmax(similarities).item()
return df.iloc[best_match_idx]['Abstract']
# Define the function to generate a response
@spaces.GPU(duration=120)
def generate_response(query):
relevant_doc = retrieve_relevant_doc(query)
input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
inputs = ai_tokenizer(input_text, return_tensors="pt").to(device)
outputs = ai_model.generate(inputs["input_ids"], max_length=1024)
response = ai_tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs="text",
title="RAG Chatbot",
description="This chatbot retrieves relevant documents based on your query and generates responses using ai models."
)
# Launch the Gradio interface
iface.launch() |