Yoxas commited on
Commit
4b169ad
·
verified ·
1 Parent(s): 4b71595

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -55
app.py CHANGED
@@ -16,75 +16,75 @@ def safe_json_loads(x):
16
  return np.array(json.loads(x))
17
  except json.JSONDecodeError as e:
18
  print(f"Error decoding JSON: {e}")
19
- return np.array([]) # Return an empty array or handle it as appropriate
20
 
21
  # Apply the safe_json_loads function to the embedding column
22
  data['embedding'] = data['embedding'].apply(safe_json_loads)
23
 
24
  # Filter out any rows with empty embeddings
25
- data = data[data['embedding'].apply(lambda x: x.size > 0)]
26
 
27
  # Check if the DataFrame is empty after filtering
28
  if data.empty:
29
- raise RuntimeError("No valid embeddings found in the data.")
30
-
31
- # Initialize FAISS index
32
- dimension = len(data['embedding'].iloc[0])
33
- res = faiss.StandardGpuResources() # use a single GPU
34
-
35
- # Check available GPU devices
36
- num_gpus = faiss.get_num_gpus()
37
- if num_gpus > 0:
38
- gpu_index = faiss.IndexFlatL2(dimension)
39
- gpu_index = faiss.index_cpu_to_gpu(res, 0, gpu_index) # move to GPU
40
  else:
41
- raise RuntimeError("No GPU devices available.")
 
 
 
 
 
 
 
 
 
 
42
 
43
- gpu_index.add(np.stack(data['embedding'].values))
44
 
45
- # Check if GPU is available
46
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
47
 
48
- # Load QA model
49
- qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad", device=0 if torch.cuda.is_available() else -1)
50
 
51
- # Load BERT model and tokenizer
52
- tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
53
- model = BertModel.from_pretrained('bert-base-uncased').to(device)
54
 
55
- # Function to embed the question using BERT
56
- def embed_question(question, model, tokenizer):
57
- inputs = tokenizer(question, return_tensors='pt').to(device)
58
- with torch.no_grad():
59
- outputs = model(**inputs)
60
- return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
61
 
62
- # Function to retrieve the relevant document and generate a response
63
- @spaces.GPU(duration=120)
64
- def retrieve_and_generate(question):
65
- # Embed the question
66
- question_embedding = embed_question(question, model, tokenizer)
67
-
68
- # Search in FAISS index
69
- _, indices = gpu_index.search(question_embedding, k=1)
70
-
71
- # Retrieve the most relevant document
72
- relevant_doc = data.iloc[indices[0][0]]
73
-
74
- # Use the QA model to generate the answer
75
- context = relevant_doc['Abstract']
76
- response = qa_model(question=question, context=context)
77
-
78
- return response['answer']
79
 
80
- # Create a Gradio interface
81
- interface = gr.Interface(
82
- fn=retrieve_and_generate,
83
- inputs=gr.Textbox(lines=2, placeholder="Ask a question about the documents..."),
84
- outputs="text",
85
- title="RAG Chatbot",
86
- description="Ask questions about the documents in the CSV file."
87
- )
88
 
89
- # Launch the Gradio app
90
- interface.launch()
 
16
  return np.array(json.loads(x))
17
  except json.JSONDecodeError as e:
18
  print(f"Error decoding JSON: {e}")
19
+ return np.zeros(128) # Return a default array of zeros
20
 
21
  # Apply the safe_json_loads function to the embedding column
22
  data['embedding'] = data['embedding'].apply(safe_json_loads)
23
 
24
  # Filter out any rows with empty embeddings
25
+ data = data[data['embedding'].apply(lambda x: x is not None and len(x) > 0)]
26
 
27
  # Check if the DataFrame is empty after filtering
28
  if data.empty:
29
+ print("No valid embeddings found in the data. Using default values.")
 
 
 
 
 
 
 
 
 
 
30
  else:
31
+ # Initialize FAISS index
32
+ dimension = len(data['embedding'].iloc[0])
33
+ res = faiss.StandardGpuResources() # use a single GPU
34
+
35
+ # Check available GPU devices
36
+ num_gpus = faiss.get_num_gpus()
37
+ if num_gpus > 0:
38
+ gpu_index = faiss.IndexFlatL2(dimension)
39
+ gpu_index = faiss.index_cpu_to_gpu(res, 0, gpu_index) # move to GPU
40
+ else:
41
+ raise RuntimeError("No GPU devices available.")
42
 
43
+ gpu_index.add(np.stack(data['embedding'].values))
44
 
45
+ # Check if GPU is available
46
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
47
 
48
+ # Load QA model
49
+ qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad", device=0 if torch.cuda.is_available() else -1)
50
 
51
+ # Load BERT model and tokenizer
52
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
53
+ model = BertModel.from_pretrained('bert-base-uncased').to(device)
54
 
55
+ # Function to embed the question using BERT
56
+ def embed_question(question, model, tokenizer):
57
+ inputs = tokenizer(question, return_tensors='pt').to(device)
58
+ with torch.no_grad():
59
+ outputs = model(**inputs)
60
+ return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
61
 
62
+ # Function to retrieve the relevant document and generate a response
63
+ @spaces.GPU(duration=120)
64
+ def retrieve_and_generate(question):
65
+ # Embed the question
66
+ question_embedding = embed_question(question, model, tokenizer)
67
+
68
+ # Search in FAISS index
69
+ _, indices = gpu_index.search(question_embedding, k=1)
70
+
71
+ # Retrieve the most relevant document
72
+ relevant_doc = data.iloc[indices[0][0]]
73
+
74
+ # Use the QA model to generate the answer
75
+ context = relevant_doc['Abstract']
76
+ response = qa_model(question=question, context=context)
77
+
78
+ return response['answer']
79
 
80
+ # Create a Gradio interface
81
+ interface = gr.Interface(
82
+ fn=retrieve_and_generate,
83
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about the documents..."),
84
+ outputs="text",
85
+ title="RAG Chatbot",
86
+ description="Ask questions about the documents in the CSV file."
87
+ )
88
 
89
+ # Launch the Gradio app
90
+ interface.launch()