Yoxas commited on
Commit
caf5793
·
verified ·
1 Parent(s): 2712486

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -35
app.py CHANGED
@@ -1,20 +1,15 @@
1
- import os
2
  import pandas as pd
3
  import torch
4
  from sentence_transformers import SentenceTransformer, util
5
  import gradio as gr
6
  import json
7
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
8
- import spaces
9
-
10
- os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
11
- os.environ['TORCH_USE_CUDA_DSA'] = "1"
12
 
13
  # Ensure you have GPU support
14
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
15
 
16
  # Load the CSV file with embeddings
17
- df = pd.read_csv('RBDx10kstats.csv')
18
  df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
19
 
20
  # Convert embeddings to tensor for efficient retrieval
@@ -23,52 +18,33 @@ embeddings = torch.tensor(df['embedding'].tolist(), device=device)
23
  # Load the Sentence Transformer model
24
  model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
25
 
26
- # Load the ai model for response generation
27
- tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
28
- model_response = AutoModelForCausalLM.from_pretrained("openai-community/gpt2").to(device)
29
-
30
- # Load the NLU model for intent detection
31
- nlu_model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english").to(device)
32
 
33
  # Define the function to find the most relevant document
34
- @spaces.GPU(duration=120)
35
  def retrieve_relevant_doc(query):
36
  query_embedding = model.encode(query, convert_to_tensor=True, device=device)
37
  similarities = util.pytorch_cos_sim(query_embedding, embeddings)[0]
38
  best_match_idx = torch.argmax(similarities).item()
39
  return df.iloc[best_match_idx]['Abstract']
40
 
41
- # Define the function to detect intent
42
- def detect_intent(query):
43
- inputs = tokenizer(query, return_tensors="pt").to(device)
44
- outputs = nlu_model(inputs["input_ids"], attention_mask=inputs["attention_mask"])
45
- intent = torch.argmax(outputs.logits).item()
46
- return intent
47
-
48
  # Define the function to generate a response
49
- @spaces.GPU(duration=120)
50
  def generate_response(query):
51
  relevant_doc = retrieve_relevant_doc(query)
52
- intent = detect_intent(query)
53
- if intent == 0: # Handle intent 0 (e.g., informational query)
54
- input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
55
- inputs = tokenizer(input_text, return_tensors="pt").to(device)
56
- outputs = model_response.generate(inputs["input_ids"], max_length=150)
57
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
- elif intent == 1: # Handle intent 1 (e.g., opinion-based query)
59
- # Generate a response based on the detected intent
60
- response = "I'm not sure I understand your question. Can you please rephrase?"
61
- else:
62
- response = "I'm not sure I understand your question. Can you please rephrase?"
63
  return response
64
 
65
  # Create a Gradio interface
66
  iface = gr.Interface(
67
  fn=generate_response,
68
- inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
69
  outputs="text",
70
  title="RAG Chatbot",
71
- description="This chatbot retrieves relevant documents based on your query and generates responses using ai models."
72
  )
73
 
74
  # Launch the Gradio interface
 
 
1
  import pandas as pd
2
  import torch
3
  from sentence_transformers import SentenceTransformer, util
4
  import gradio as gr
5
  import json
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
7
 
8
  # Ensure you have GPU support
9
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
10
 
11
  # Load the CSV file with embeddings
12
+ df = pd.read_csv('updated_dataset_with_embeddings.csv')
13
  df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
14
 
15
  # Convert embeddings to tensor for efficient retrieval
 
18
  # Load the Sentence Transformer model
19
  model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
20
 
21
+ # Load the LLaMA model for response generation
22
+ llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
23
+ llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct").to(device)
 
 
 
24
 
25
  # Define the function to find the most relevant document
 
26
  def retrieve_relevant_doc(query):
27
  query_embedding = model.encode(query, convert_to_tensor=True, device=device)
28
  similarities = util.pytorch_cos_sim(query_embedding, embeddings)[0]
29
  best_match_idx = torch.argmax(similarities).item()
30
  return df.iloc[best_match_idx]['Abstract']
31
 
 
 
 
 
 
 
 
32
  # Define the function to generate a response
 
33
  def generate_response(query):
34
  relevant_doc = retrieve_relevant_doc(query)
35
+ input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
36
+ inputs = llama_tokenizer(input_text, return_tensors="pt").to(device)
37
+ outputs = llama_model.generate(inputs["input_ids"], max_length=150)
38
+ response = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
39
  return response
40
 
41
  # Create a Gradio interface
42
  iface = gr.Interface(
43
  fn=generate_response,
44
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your query here..."),
45
  outputs="text",
46
  title="RAG Chatbot",
47
+ description="This chatbot retrieves relevant documents based on your query and generates responses using LLaMA."
48
  )
49
 
50
  # Launch the Gradio interface