Yoxas commited on
Commit
62b261c
·
verified ·
1 Parent(s): c2371ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -18
app.py CHANGED
@@ -1,10 +1,12 @@
1
  import pandas as pd
2
  import torch
3
  from sentence_transformers import SentenceTransformer, util
 
4
  import gradio as gr
5
  import json
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import spaces
 
8
  # Ensure you have GPU support
9
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
10
 
@@ -12,41 +14,48 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
12
  df = pd.read_csv('RBDx10kstats.csv')
13
  df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
14
 
15
- # Convert embeddings to tensor for efficient retrieval
16
- embeddings = torch.tensor(df['embedding'].tolist(), device=device)
 
 
 
 
17
 
18
  # Load the Sentence Transformer model
19
- model = SentenceTransformer('all-MiniLM-L6-v2', device=device)
 
 
 
 
20
 
21
- # Load the ai model for response generation
22
- ai_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2-large")
23
- ai_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2-large").to(device)
24
 
25
- # Define the function to find the most relevant document
26
- @spaces.GPU(duration=120)
27
  def retrieve_relevant_doc(query):
28
- query_embedding = model.encode(query, convert_to_tensor=True, device=device)
29
- similarities = util.pytorch_cos_sim(query_embedding, embeddings)[0]
30
- best_match_idx = torch.argmax(similarities).item()
31
  return df.iloc[best_match_idx]['Abstract']
32
 
33
  # Define the function to generate a response
34
- @spaces.GPU(duration=120)
35
  def generate_response(query):
36
  relevant_doc = retrieve_relevant_doc(query)
 
 
37
  input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
38
- inputs = ai_tokenizer(input_text, return_tensors="pt").to(device)
39
- outputs = ai_model.generate(inputs["input_ids"], max_length=1024)
40
- response = ai_tokenizer.decode(outputs[0], skip_special_tokens=True)
41
  return response
42
 
43
  # Create a Gradio interface
44
  iface = gr.Interface(
45
  fn=generate_response,
46
- inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
47
  outputs="text",
48
  title="RAG Chatbot",
49
- description="This chatbot retrieves relevant documents based on your query and generates responses using ai models."
50
  )
51
 
52
  # Launch the Gradio interface
 
1
  import pandas as pd
2
  import torch
3
  from sentence_transformers import SentenceTransformer, util
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
  import gradio as gr
6
  import json
7
+ import faiss
8
  import spaces
9
+
10
  # Ensure you have GPU support
11
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
12
 
 
14
  df = pd.read_csv('RBDx10kstats.csv')
15
  df['embedding'] = df['embedding'].apply(json.loads) # Convert JSON string back to list
16
 
17
+ # Convert embeddings to a numpy array
18
+ embeddings = np.array(df['embedding'].tolist(), dtype='float32')
19
+
20
+ # Setup FAISS
21
+ index = faiss.IndexFlatL2(embeddings.shape[1]) # dimension should match the embedding size
22
+ index.add(embeddings)
23
 
24
  # Load the Sentence Transformer model
25
+ sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
26
+
27
+ # Load the LLaMA model for response generation
28
+ llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
29
+ llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf").to(device)
30
 
31
+ # Load the summarization model
32
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0 if device == 'cuda' else -1)
 
33
 
34
+ # Define the function to find the most relevant document using FAISS
 
35
  def retrieve_relevant_doc(query):
36
+ query_embedding = sentence_model.encode(query, convert_to_tensor=False)
37
+ _, indices = index.search(np.array([query_embedding]), k=1)
38
+ best_match_idx = indices[0][0]
39
  return df.iloc[best_match_idx]['Abstract']
40
 
41
  # Define the function to generate a response
 
42
  def generate_response(query):
43
  relevant_doc = retrieve_relevant_doc(query)
44
+ if len(relevant_doc) > 512: # Truncate long documents
45
+ relevant_doc = summarizer(relevant_doc, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
46
  input_text = f"Document: {relevant_doc}\n\nQuestion: {query}\n\nAnswer:"
47
+ inputs = llama_tokenizer(input_text, return_tensors="pt").to(device)
48
+ outputs = llama_model.generate(inputs["input_ids"], max_length=150)
49
+ response = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)
50
  return response
51
 
52
  # Create a Gradio interface
53
  iface = gr.Interface(
54
  fn=generate_response,
55
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your query here..."),
56
  outputs="text",
57
  title="RAG Chatbot",
58
+ description="This chatbot retrieves relevant documents based on your query and generates responses using LLaMA."
59
  )
60
 
61
  # Launch the Gradio interface