Spaces:
Runtime error
Runtime error
File size: 2,791 Bytes
e7abd03 716f829 62b261c 716f829 e7abd03 62b261c 02403f4 be520f8 236d6fa 716f829 c30444f c2371ad e7abd03 62b261c d0ab9b7 62b261c 49dab71 e7abd03 62b261c e7abd03 62b261c 52dd91e 716f829 62b261c 716f829 b71f887 d0ab9b7 52dd91e 716f829 62b261c 7d97b15 be520f8 caf5793 62b261c be520f8 7d97b15 be520f8 62b261c d0ab9b7 e7abd03 716f829 53297e2 6f729e6 e7abd03 62b261c 6f729e6 716f829 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer, util
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
import json
import faiss
import numpy as np
import spaces
# Ensure you have GPU support
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load the CSV file with embeddings
df = pd.read_csv('RBDx10kstats.csv')
df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
# Convert embeddings to a numpy array
embeddings = np.array(df['embedding'].tolist(), dtype='float32')
# Setup FAISS
index = faiss.IndexFlatL2(embeddings.shape[1]) # dimension should match the embedding size
index.add(embeddings)
# Load the Sentence Transformer model
sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
# Load the LLaMA model for response generation
llama_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
llama_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2").to(device)
# Load the summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if device == 'cuda' else -1)
# Define the function to find the most relevant document using FAISS
@spaces.GPU(duration=120)
def retrieve_relevant_doc(query):
query_embedding = sentence_model.encode(query, convert_to_tensor=False)
_, indices = index.search(np.array([query_embedding]), k=1)
best_match_idx = indices[0][0]
return df.iloc[best_match_idx]['Abstract']
# Define the function to generate a response
@spaces.GPU(duration=120)
def generate_response(query):
relevant_doc = retrieve_relevant_doc(query)
if len(relevant_doc) > 512: # Truncate long documents
relevant_doc = summarizer(relevant_doc, max_length=4096, min_length=50, do_sample=False)[0]['summary_text']
input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
inputs = llama_tokenizer(input_text, return_tensors="pt").to(device)
# Set pad_token_id to eos_token_id to avoid the warning
pad_token_id = llama_tokenizer.eos_token_id
outputs = llama_model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_length=512,
pad_token_id=pad_token_id
)
response = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
# Create a Gradio interface
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
outputs="text",
title="RAG Chatbot",
description="This chatbot retrieves relevant documents based on your query and generates responses using LLaMA."
)
# Launch the Gradio interface
iface.launch() |