Yoxas commited on
Commit
6f729e6
·
verified ·
1 Parent(s): d93056d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -64
app.py CHANGED
@@ -16,75 +16,74 @@ def safe_json_loads(x):
16
  return np.array(json.loads(x))
17
  except json.JSONDecodeError as e:
18
  print(f"Error decoding JSON: {e}")
19
- return np.zeros(128) # Return a default array of zeros
20
 
21
  # Apply the safe_json_loads function to the embedding column
22
  data['embedding'] = data['embedding'].apply(safe_json_loads)
23
 
24
  # Filter out any rows with empty embeddings
25
- data = data[data['embedding'].apply(lambda x: x is not None and len(x) > 0)]
26
 
27
  # Check if the DataFrame is empty after filtering
28
  if data.empty:
29
- print("No valid embeddings found in the data. Using default values.")
30
- else:
31
- # Initialize FAISS index
32
- dimension = len(data['embedding'].iloc[0])
33
- res = faiss.StandardGpuResources() # use a single GPU
34
-
35
- # Check available GPU devices
36
- num_gpus = faiss.get_num_gpus()
37
- if num_gpus > 0:
38
- gpu_index = faiss.IndexFlatL2(dimension)
39
- gpu_index = faiss.index_cpu_to_gpu(res, 0, gpu_index) # move to GPU
40
- else:
41
- raise RuntimeError("No GPU devices available.")
42
-
43
- gpu_index.add(np.stack(data['embedding'].values))
44
-
45
- # Check if GPU is available
46
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
47
-
48
- # Load QA model
49
- qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad", device=0 if torch.cuda.is_available() else -1)
50
-
51
- # Load BERT model and tokenizer
52
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
53
- model = BertModel.from_pretrained('bert-base-uncased').to(device)
54
-
55
- # Function to embed the question using BERT
56
- def embed_question(question, model, tokenizer):
57
- inputs = tokenizer(question, return_tensors='pt').to(device)
58
- with torch.no_grad():
59
- outputs = model(**inputs)
60
- return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
61
-
62
- # Function to retrieve the relevant document and generate a response
63
- @spaces.GPU(duration=120)
64
- def retrieve_and_generate(question):
65
- # Embed the question
66
- question_embedding = embed_question(question, model, tokenizer)
67
-
68
- # Search in FAISS index
69
- _, indices = gpu_index.search(question_embedding, k=1)
70
-
71
- # Retrieve the most relevant document
72
- relevant_doc = data.iloc[indices[0][0]]
73
-
74
- # Use the QA model to generate the answer
75
- context = relevant_doc['Abstract']
76
- response = qa_model(question=question, context=context)
77
-
78
- return response['answer']
79
-
80
- # Create a Gradio interface
81
- interface = gr.Interface(
82
- fn=retrieve_and_generate,
83
- inputs=gr.Textbox(lines=2, placeholder="Ask a question about the documents..."),
84
- outputs="text",
85
- title="RAG Chatbot",
86
- description="Ask questions about the documents in the CSV file."
87
- )
88
-
89
- # Launch the Gradio app
90
- interface.launch()
 
16
  return np.array(json.loads(x))
17
  except json.JSONDecodeError as e:
18
  print(f"Error decoding JSON: {e}")
19
+ return np.array([]) # Return an empty array or handle it as appropriate
20
 
21
  # Apply the safe_json_loads function to the embedding column
22
  data['embedding'] = data['embedding'].apply(safe_json_loads)
23
 
24
  # Filter out any rows with empty embeddings
25
+ data = data[data['embedding'].apply(lambda x: x.size > 0)]
26
 
27
  # Check if the DataFrame is empty after filtering
28
  if data.empty:
29
+ raise RuntimeError("No valid embeddings found in the data.")
30
+
31
+ # Initialize FAISS index
32
+ dimension = len(data['embedding'].iloc[0])
33
+ gpu_available = torch.cuda.is_available()
34
+
35
+ # Initialize FAISS resources and index
36
+ res = faiss.StandardGpuResources() if gpu_available else None
37
+ index = faiss.IndexFlatL2(dimension)
38
+
39
+ if gpu_available:
40
+ index = faiss.index_cpu_to_gpu(res, 0, index) # move to GPU
41
+
42
+ index.add(np.stack(data['embedding'].values))
43
+
44
+ # Set the device
45
+ device = torch.device('cuda' if gpu_available else 'cpu')
46
+
47
+ # Load QA model
48
+ qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad", device=0 if gpu_available else -1)
49
+
50
+ # Load BERT model and tokenizer
51
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
52
+ model = BertModel.from_pretrained('bert-base-uncased').to(device)
53
+
54
+ # Function to embed the question using BERT
55
+ def embed_question(question, model, tokenizer):
56
+ inputs = tokenizer(question, return_tensors='pt').to(device)
57
+ with torch.no_grad():
58
+ outputs = model(**inputs)
59
+ return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
60
+
61
+ # Function to retrieve the relevant document and generate a response
62
+ @spaces.GPU(duration=120)
63
+ def retrieve_and_generate(question):
64
+ # Embed the question
65
+ question_embedding = embed_question(question, model, tokenizer)
66
+
67
+ # Search in FAISS index
68
+ _, indices = index.search(question_embedding, k=1)
69
+
70
+ # Retrieve the most relevant document
71
+ relevant_doc = data.iloc[indices[0][0]]
72
+
73
+ # Use the QA model to generate the answer
74
+ context = relevant_doc['Abstract']
75
+ response = qa_model(question=question, context=context)
76
+
77
+ return response['answer']
78
+
79
+ # Create a Gradio interface
80
+ interface = gr.Interface(
81
+ fn=retrieve_and_generate,
82
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about the documents..."),
83
+ outputs="text",
84
+ title="RAG Chatbot",
85
+ description="Ask questions about the documents in the CSV file."
86
+ )
87
+
88
+ # Launch the Gradio app
89
+ interface.launch()