Spaces:

Yoxas
/

testchatbot

Runtime error

App Files Files Community

Yoxas commited on Jun 2, 2024

Commit

fcb1289

verified ·

1 Parent(s): a63f6c6

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -12

app.py CHANGED Viewed

@@ -3,12 +3,9 @@ import torch
 from sentence_transformers import SentenceTransformer, util
 import gradio as gr
 import json
-from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 import spaces
-# Ensure you have GPU support
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
 # Load the CSV file with embeddings
 df = pd.read_csv('RBDx10kstats.csv')
 df['embedding'] = df['embedding'].apply(json.loads)  # Convert JSON string back to list
@@ -19,9 +16,12 @@ embeddings = torch.tensor(df['embedding'].tolist(), device=device)
 # Load the Sentence Transformer model
 model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
-# Load the LLaMA model for response generation
-llama_tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-distilled-squad")
-llama_model = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased-distilled-squad").to(device)
 # Define the function to find the most relevant document
 @spaces.GPU(duration=120)
@@ -31,14 +31,29 @@ def retrieve_relevant_doc(query):
     best_match_idx = torch.argmax(similarities).item()
     return df.iloc[best_match_idx]['Abstract']
 # Define the function to generate a response
 @spaces.GPU(duration=120)
 def generate_response(query):
     relevant_doc = retrieve_relevant_doc(query)
-    input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
-    inputs = llama_tokenizer(input_text, return_tensors="pt").to(device)
-    outputs = llama_model.generate(inputs["input_ids"], max_length=500)
-    response = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 # Create a Gradio interface
@@ -47,7 +62,7 @@ iface = gr.Interface(
     inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
     outputs="text",
     title="RAG Chatbot",
-    description="This chatbot retrieves relevant documents based on your query and generates responses using LLaMA."
 )
 # Launch the Gradio interface

 from sentence_transformers import SentenceTransformer, util
 import gradio as gr
 import json
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering, AutoModelForSequenceClassification
 import spaces
 # Load the CSV file with embeddings
 df = pd.read_csv('RBDx10kstats.csv')
 df['embedding'] = df['embedding'].apply(json.loads)  # Convert JSON string back to list
 # Load the Sentence Transformer model
 model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
+# Load the ai model for response generation
+tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-distilled-squad")
+model_response = AutoModelForQuestionAnswering.from_pretrained("distilbert/distilbert-base-uncased-distilled-squad").to(device)
+# Load the NLU model for intent detection
+nlu_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased-finetuned-sst-2-english").to(device)
 # Define the function to find the most relevant document
 @spaces.GPU(duration=120)
     best_match_idx = torch.argmax(similarities).item()
     return df.iloc[best_match_idx]['Abstract']
+# Define the function to detect intent
+@spaces.GPU(duration=120)
+def detect_intent(query):
+    inputs = tokenizer(query, return_tensors="pt").to(device)
+    outputs = nlu_model(inputs["input_ids"], attention_mask=inputs["attention_mask"])
+    intent = torch.argmax(outputs.logits).item()
+    return intent
 # Define the function to generate a response
 @spaces.GPU(duration=120)
 def generate_response(query):
     relevant_doc = retrieve_relevant_doc(query)
+    intent = detect_intent(query)
+    if intent == 0:  # Handle intent 0 (e.g., informational query)
+        input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
+        inputs = tokenizer(input_text, return_tensors="pt").to(device)
+        outputs = model_response.generate(inputs["input_ids"], max_length=500)
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    elif intent == 1:  # Handle intent 1 (e.g., opinion-based query)
+        # Generate a response based on the detected intent
+        response = "I'm not sure I understand your question. Can you please rephrase?"
+    else:
+        response = "I'm not sure I understand your question. Can you please rephrase?"
     return response
 # Create a Gradio interface
     inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
     outputs="text",
     title="RAG Chatbot",
+    description="This chatbot retrieves relevant documents based on your query and generates responses using ai models."
 )
 # Launch the Gradio interface