gaonkarrs commited on
Commit
5f39e9e
·
1 Parent(s): b98ae87

All datasets

Browse files
app.py CHANGED
@@ -1,36 +1,58 @@
1
- # -*- coding: utf-8 -*-
2
- """Deploy_CapstoneRagBench.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1OG-77VqKwz3509_osgNgSeOMJ9G6RvB4
8
- """
9
-
10
- # For Legal
11
-
12
  from datasets import load_from_disk
13
  from transformers import AutoTokenizer, AutoModel
14
  import faiss
15
  import numpy as np
16
  import torch
17
- from datasets import load_dataset, Dataset, get_dataset_config_names
 
 
18
  import os
 
 
19
  from groq import Groq
20
  from sentence_transformers import CrossEncoder
21
  import requests
22
  import uuid
23
  import re
24
- import gradio as gr
25
  import json
26
- import torch
27
- import numpy as np
28
- from sklearn.metrics import mean_squared_error, roc_auc_score
29
  import gradio as gr
30
  import io
31
  import sys
32
  import traceback
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def retrieve_top_k(query,domain='legal', model_name='nlpaueb/legal-bert-base-uncased', k=8):
36
  # Load tokenizer and model
@@ -47,8 +69,8 @@ def retrieve_top_k(query,domain='legal', model_name='nlpaueb/legal-bert-base-unc
47
  query_embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
48
 
49
  # Load FAISS index and dataset
50
- index_path = f"{domain}_index/faiss.index"
51
- dataset_path = f"{domain}_dataset"
52
 
53
  faiss_index = faiss.read_index(index_path)
54
  dataset = load_from_disk(dataset_path)
@@ -60,17 +82,37 @@ def retrieve_top_k(query,domain='legal', model_name='nlpaueb/legal-bert-base-unc
60
  top_chunks = [dataset[int(idx)]['text'] for idx in I[0]]
61
  return top_chunks
62
 
63
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
64
- #print(device)
65
 
66
- dataset = load_dataset("rungalileo/ragbench", "cuad", split="test")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  client = Groq(
69
  api_key= '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',#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',
70
  )
71
 
72
- # Load BGE reranker
73
- reranker = CrossEncoder("BAAI/bge-reranker-base", max_length=512)
74
 
75
  def rerank_documents_bge(query, documents, top_n=5, return_scores=False):
76
  """
@@ -103,13 +145,18 @@ def rerank_documents_bge(query, documents, top_n=5, return_scores=False):
103
  return [doc for doc, _ in reranked[:top_n]]
104
 
105
 
106
- def generate_response_rag(query,model,index_dir="legal_index"):
 
107
  # Step 1: Retrieve top-k context chunks using your FAISS setup
108
- top_chunks = retrieve_top_k(query,'legal', "nlpaueb/legal-bert-base-uncased")
 
 
 
109
 
110
  # Step 2: Rerank retrieved documents using cross-encoder
111
  #reranked_chunks = rerank_documents(query, top_chunks, top_n=15)
112
  #rerank_and_filter_chunks = filter_by_faithfulness(query, reranked_chunks)
 
113
 
114
  #reranked_chunks = rerank_and_filter_chunks
115
  reranked_chunks_bge = rerank_documents_bge(query, top_chunks, top_n=5)
@@ -136,11 +183,11 @@ def generate_response_rag(query,model,index_dir="legal_index"):
136
  messages=[
137
  {"role": "user", "content": prompt}
138
  ],
139
- model=model,#"gemma2-9b-it"#"qwen/qwen3-32b"#deepseek-r1-distill-llama-70b",#"llama3-70b-8192", # mistral-saba-24b
140
  temperature=0.0
141
  )
142
 
143
- return chat_completion.choices[0].message.content.strip()
144
 
145
  '''response = openai.chat.completions.create(
146
  model="gpt-3.5-turbo",
@@ -155,6 +202,7 @@ def generate_response_rag(query,model,index_dir="legal_index"):
155
 
156
  #JUDGE LLM
157
 
 
158
  def split_into_keyed_sentences(text, prefix):
159
  """Splits text into sentences with keys like '0a.', '0b.', or 'a.', 'b.', etc."""
160
  # Basic sentence tokenizer with keys
@@ -167,20 +215,23 @@ def split_into_keyed_sentences(text, prefix):
167
  return keyed
168
 
169
 
170
- def jugde_response_rag(query, embedder="nlpaueb/legal-bert-base-uncased", domain="legal", k=5):
171
 
172
- top_chunks = retrieve_top_k(query)
173
 
174
- top_chunks = [chunk[0] if isinstance(chunk, tuple) else chunk for chunk in top_chunks]
175
 
176
  # Step 2: Prepare context and RAG-style prompt
177
- context = "\n\n".join(top_chunks)
178
 
179
  # Split context and dummy answer into keyed sentences
180
- document_keys = split_into_keyed_sentences(context, "0")
181
 
182
  #print(f"Query:{query}\n====================================================================")
183
- response = generate_response_rag(query,model="llama3-70b-8192") #deepseek-r1-distill-llama-70b llama3-70b-8192
 
 
 
184
  #print(f"\n====================================\Generator Response:{response}")
185
  #For deepseek
186
  #print("Before Curated:",response)
@@ -327,6 +378,7 @@ is supported by the text in the documents.\
327
  )
328
  return documents_formatted,chat_completion.choices[0].message.content'''
329
 
 
330
  def extract_retrieved_sentence_keys(document_text: str) -> list[str]:
331
  """
332
  Extracts sentence keys like '0a.', '0b.', etc. from a formatted document string.
@@ -386,11 +438,11 @@ def compute_ragbench_metrics(judge_response: dict, retrieved_sentence_keys: list
386
  }
387
 
388
 
389
- def compute_rmse(gt, pred):
390
- return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
391
-
 
392
 
393
- def evaluate_rag_pipeline(q_indices):
394
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
395
 
396
  def safe_append(gt_list, pred_list, gt_val, pred_val):
@@ -421,12 +473,24 @@ def evaluate_rag_pipeline(q_indices):
421
  gt_completeness, pred_completeness = [], []
422
  gt_adherence, pred_adherence = [], []
423
 
 
 
 
 
 
 
 
 
 
 
 
424
  for i in q_indices:
425
  query = dataset[i]['question']
426
  print(f"\n\n\nQuery:{i}.{query}\n====================================================================")
427
- documents_formatted, response = jugde_response_rag(
428
- query, embedder="nlpaueb/legal-bert-base-uncased", domain="legal")
429
  judge_response = clean_and_parse_json_block(response)
 
430
  print(f"\n======================================================================\nResponse:{judge_response}")
431
  retrieved_sentences = extract_retrieved_sentence_keys(documents_formatted)
432
  predicted = compute_ragbench_metrics(judge_response, retrieved_sentences)
@@ -462,17 +526,18 @@ def evaluate_rag_pipeline(q_indices):
462
  return result
463
 
464
 
465
- # Wrapper to parse textbox input into list of ints
466
- def evaluate_rag_gradio(q_indices_str):
467
- # Capture printed logs
 
468
  log_stream = io.StringIO()
469
  sys.stdout = log_stream
470
 
471
  try:
 
472
  q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
473
- results = evaluate_rag_pipeline(q_indices)
474
 
475
- # Return metrics and logs
476
  logs = log_stream.getvalue()
477
  return results, logs
478
 
@@ -481,17 +546,22 @@ def evaluate_rag_gradio(q_indices_str):
481
  return {"error": str(e)}, log_stream.getvalue()
482
 
483
  finally:
484
- sys.stdout = sys.__stdout__
485
 
 
486
  iface = gr.Interface(
487
  fn=evaluate_rag_gradio,
488
- inputs=gr.Textbox(label="Comma-separated Query Indices (e.g. 89,121,245)", lines=1),
 
 
 
489
  outputs=[
490
  gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
491
- gr.Textbox(label="Execution Log", lines=5, interactive=True)
492
  ],
493
  title="RAG Evaluation Dashboard",
494
  description="Evaluate your RAG pipeline across selected queries using GPT-based generation and judgment."
495
  )
496
 
497
- iface.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from datasets import load_from_disk
2
  from transformers import AutoTokenizer, AutoModel
3
  import faiss
4
  import numpy as np
5
  import torch
6
+ from datasets import load_from_disk
7
+ import faiss
8
+ import numpy as np
9
  import os
10
+ from datasets import load_dataset, Dataset, get_dataset_config_names
11
+ from sentence_transformers import SentenceTransformer
12
  from groq import Groq
13
  from sentence_transformers import CrossEncoder
14
  import requests
15
  import uuid
16
  import re
 
17
  import json
 
 
 
18
  import gradio as gr
19
  import io
20
  import sys
21
  import traceback
22
 
23
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
24
+ # Preload datasets and indices
25
+ hf_dataset_cs = load_from_disk("cs_dataset")
26
+ faiss_index_cs = faiss.read_index("cs_index/faiss.index")
27
+
28
+ hf_dataset_med = load_from_disk("med_dataset")
29
+ faiss_index_med = faiss.read_index("med_index/faiss.index")
30
+
31
+ hf_dataset_gk = load_from_disk("gk_dataset")
32
+ faiss_index_gk = faiss.read_index("gk_index/faiss.index")
33
+
34
+ hf_dataset_fin = load_from_disk("fin_dataset")
35
+ faiss_index_fin = faiss.read_index("fin_index/faiss.index")
36
+
37
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
+ print(device)
39
+
40
+
41
+ legal_dataset = load_dataset("rungalileo/ragbench", "cuad", split="test")
42
+ med_dataset = load_dataset("rungalileo/ragbench", "pubmedqa", split="test")
43
+ gk_dataset = load_dataset("rungalileo/ragbench", "hotpotqa", split="test")
44
+ cs_dataset = load_dataset("rungalileo/ragbench", "emanual", split="test")
45
+ fin_dataset = load_dataset("rungalileo/ragbench", "finqa", split="test")
46
+
47
+ # Load BGE reranker
48
+ reranker = CrossEncoder("BAAI/bge-reranker-base", max_length=512)
49
+
50
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
51
+ model_name = "nlpaueb/legal-bert-base-uncased"
52
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
53
+ model = AutoModel.from_pretrained(model_name).to(device)
54
+ model.eval()
55
+
56
 
57
  def retrieve_top_k(query,domain='legal', model_name='nlpaueb/legal-bert-base-uncased', k=8):
58
  # Load tokenizer and model
 
69
  query_embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
70
 
71
  # Load FAISS index and dataset
72
+ index_path = f"legal_index/faiss.index"
73
+ dataset_path = f"legal_dataset"
74
 
75
  faiss_index = faiss.read_index(index_path)
76
  dataset = load_from_disk(dataset_path)
 
82
  top_chunks = [dataset[int(idx)]['text'] for idx in I[0]]
83
  return top_chunks
84
 
 
 
85
 
86
+
87
+ # Retrieval function using preloaded objects
88
+ def retrieve_top_c(query, domain, embedder, k=5):
89
+ if domain == "CS":
90
+ hf_dataset = hf_dataset_cs
91
+ faiss_index = faiss_index_cs
92
+ elif domain == "Medical":
93
+ hf_dataset = hf_dataset_med
94
+ faiss_index = faiss_index_med
95
+ elif domain == "GK":
96
+ hf_dataset = hf_dataset_gk
97
+ faiss_index = faiss_index_gk
98
+ elif domain == "Finance":
99
+ hf_dataset = hf_dataset_fin
100
+ faiss_index = faiss_index_fin
101
+ else:
102
+ raise ValueError(f"Unknown domain: {domain}")
103
+
104
+ # Encode query and search
105
+ query_embedding = embedder.encode([query]).astype('float32')
106
+ #query_embedding = embedder.encode([query], convert_to_numpy=True).astype('float32')
107
+ distances, indices = faiss_index.search(query_embedding, k)
108
+
109
+ return [hf_dataset[int(i)]["text"] for i in indices[0]]
110
+
111
 
112
  client = Groq(
113
  api_key= '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',#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',
114
  )
115
 
 
 
116
 
117
  def rerank_documents_bge(query, documents, top_n=5, return_scores=False):
118
  """
 
145
  return [doc for doc, _ in reranked[:top_n]]
146
 
147
 
148
+
149
+ def generate_response_rag(query,domain):
150
  # Step 1: Retrieve top-k context chunks using your FAISS setup
151
+ if domain == "Legal":
152
+ top_chunks = retrieve_top_k(query,'Legal', model_name)
153
+ else:
154
+ top_chunks = retrieve_top_c(query, domain,embedder)
155
 
156
  # Step 2: Rerank retrieved documents using cross-encoder
157
  #reranked_chunks = rerank_documents(query, top_chunks, top_n=15)
158
  #rerank_and_filter_chunks = filter_by_faithfulness(query, reranked_chunks)
159
+ #print("Retrieved Top chunks",top_chunks)
160
 
161
  #reranked_chunks = rerank_and_filter_chunks
162
  reranked_chunks_bge = rerank_documents_bge(query, top_chunks, top_n=5)
 
183
  messages=[
184
  {"role": "user", "content": prompt}
185
  ],
186
+ model="llama3-70b-8192",#"gemma2-9b-it"#"qwen/qwen3-32b"#deepseek-r1-distill-llama-70b",#"llama3-70b-8192", # mistral-saba-24b
187
  temperature=0.0
188
  )
189
 
190
+ return context,chat_completion.choices[0].message.content.strip()
191
 
192
  '''response = openai.chat.completions.create(
193
  model="gpt-3.5-turbo",
 
202
 
203
  #JUDGE LLM
204
 
205
+
206
  def split_into_keyed_sentences(text, prefix):
207
  """Splits text into sentences with keys like '0a.', '0b.', or 'a.', 'b.', etc."""
208
  # Basic sentence tokenizer with keys
 
215
  return keyed
216
 
217
 
218
+ def jugde_response_rag(query, domain):
219
 
220
+ #top_chunks = retrieve_top_k(query)
221
 
222
+ #top_chunks = [chunk[0] if isinstance(chunk, tuple) else chunk for chunk in top_chunks]
223
 
224
  # Step 2: Prepare context and RAG-style prompt
225
+ #context = "\n\n".join(top_chunks)
226
 
227
  # Split context and dummy answer into keyed sentences
228
+ #document_keys = split_into_keyed_sentences(context, "0")
229
 
230
  #print(f"Query:{query}\n====================================================================")
231
+ context,response = generate_response_rag(query,domain) #deepseek-r1-distill-llama-70b llama3-70b-8192
232
+
233
+ # Split context and dummy answer into keyed sentences
234
+ document_keys = split_into_keyed_sentences(context, "0")
235
  #print(f"\n====================================\Generator Response:{response}")
236
  #For deepseek
237
  #print("Before Curated:",response)
 
378
  )
379
  return documents_formatted,chat_completion.choices[0].message.content'''
380
 
381
+
382
  def extract_retrieved_sentence_keys(document_text: str) -> list[str]:
383
  """
384
  Extracts sentence keys like '0a.', '0b.', etc. from a formatted document string.
 
438
  }
439
 
440
 
441
+ def evaluate_rag_pipeline(domain, q_indices):
442
+ import torch
443
+ import numpy as np
444
+ from sklearn.metrics import mean_squared_error, roc_auc_score
445
 
 
446
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
447
 
448
  def safe_append(gt_list, pred_list, gt_val, pred_val):
 
473
  gt_completeness, pred_completeness = [], []
474
  gt_adherence, pred_adherence = [], []
475
 
476
+ if(domain=="Legal"):
477
+ dataset = legal_dataset
478
+ elif(domain=="Medical"):
479
+ dataset = med_dataset
480
+ elif(domain=="GK"):
481
+ dataset = gk_dataset
482
+ elif(domain=="CS"):
483
+ dataset = cs_dataset
484
+ elif(domain=="Finance"):
485
+ dataset = fin_dataset
486
+
487
  for i in q_indices:
488
  query = dataset[i]['question']
489
  print(f"\n\n\nQuery:{i}.{query}\n====================================================================")
490
+ #print(f"\ndomain:{domain}====================================================================")
491
+ documents_formatted, response = jugde_response_rag(query, domain)
492
  judge_response = clean_and_parse_json_block(response)
493
+ print(f"\ndocuments_formatted:{documents_formatted}")
494
  print(f"\n======================================================================\nResponse:{judge_response}")
495
  retrieved_sentences = extract_retrieved_sentence_keys(documents_formatted)
496
  predicted = compute_ragbench_metrics(judge_response, retrieved_sentences)
 
526
  return result
527
 
528
 
529
+
530
+ # Updated wrapper
531
+ def evaluate_rag_gradio(domain, q_indices_str):
532
+ # Capture logs
533
  log_stream = io.StringIO()
534
  sys.stdout = log_stream
535
 
536
  try:
537
+ # Parse comma-separated indices
538
  q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
539
+ results = evaluate_rag_pipeline(domain, q_indices)
540
 
 
541
  logs = log_stream.getvalue()
542
  return results, logs
543
 
 
546
  return {"error": str(e)}, log_stream.getvalue()
547
 
548
  finally:
549
+ sys.stdout = sys.__stdout__ # Restore stdout
550
 
551
+ # Gradio interface
552
  iface = gr.Interface(
553
  fn=evaluate_rag_gradio,
554
+ inputs=[
555
+ gr.Dropdown(choices=["Legal", "Medical", "GK", "CS", "Finance"], label="Domain"),
556
+ gr.Textbox(label="Comma-separated Query Indices (e.g. 89,121,245)", lines=1),
557
+ ],
558
  outputs=[
559
  gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
560
+ gr.Textbox(label="Execution Log", lines=10, interactive=True),
561
  ],
562
  title="RAG Evaluation Dashboard",
563
  description="Evaluate your RAG pipeline across selected queries using GPT-based generation and judgment."
564
  )
565
 
566
+ # Launch app
567
+ iface.launch(server_name="0.0.0.0", server_port=7860, debug=True)
bkp_app.py ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Deploy_CapstoneRagBench.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1OG-77VqKwz3509_osgNgSeOMJ9G6RvB4
8
+ """
9
+
10
+ # For Legal
11
+
12
+ from datasets import load_from_disk
13
+ from transformers import AutoTokenizer, AutoModel
14
+ import faiss
15
+ import numpy as np
16
+ import torch
17
+ from datasets import load_dataset, Dataset, get_dataset_config_names
18
+ import os
19
+ from groq import Groq
20
+ from sentence_transformers import CrossEncoder
21
+ import requests
22
+ import uuid
23
+ import re
24
+ import gradio as gr
25
+ import json
26
+ import torch
27
+ import numpy as np
28
+ from sklearn.metrics import mean_squared_error, roc_auc_score
29
+ import gradio as gr
30
+ import io
31
+ import sys
32
+ import traceback
33
+
34
+
35
+ def retrieve_top_k(query,domain='legal', model_name='nlpaueb/legal-bert-base-uncased', k=8):
36
+ # Load tokenizer and model
37
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
38
+ model = AutoModel.from_pretrained(model_name).to(device)
39
+ model.eval()
40
+
41
+ #print(f"In retrive_top_k Query:{query}")
42
+ # Tokenize and embed query using mean pooling
43
+ inputs = tokenizer(query, return_tensors="pt", padding=True, truncation=True, max_length=512)
44
+ inputs = {k: v.to(device) for k, v in inputs.items()}
45
+ with torch.no_grad():
46
+ outputs = model(**inputs)
47
+ query_embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
48
+
49
+ # Load FAISS index and dataset
50
+ index_path = f"{domain}_index/faiss.index"
51
+ dataset_path = f"{domain}_dataset"
52
+
53
+ faiss_index = faiss.read_index(index_path)
54
+ dataset = load_from_disk(dataset_path)
55
+
56
+ # Perform FAISS search
57
+ D, I = faiss_index.search(query_embedding.astype('float32'), k)
58
+
59
+ # Retrieve top-k matching chunks
60
+ top_chunks = [dataset[int(idx)]['text'] for idx in I[0]]
61
+ return top_chunks
62
+
63
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
64
+ #print(device)
65
+
66
+ dataset = load_dataset("rungalileo/ragbench", "cuad", split="test")
67
+
68
+ client = Groq(
69
+ api_key= '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',#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',
70
+ )
71
+
72
+ # Load BGE reranker
73
+ reranker = CrossEncoder("BAAI/bge-reranker-base", max_length=512)
74
+
75
+ def rerank_documents_bge(query, documents, top_n=5, return_scores=False):
76
+ """
77
+ Rerank documents using BAAI/bge-reranker-base CrossEncoder.
78
+
79
+ Args:
80
+ query (str): The query string.
81
+ documents (List[str]): List of candidate documents.
82
+ top_n (int): Number of top results to return.
83
+ return_scores (bool): Whether to return scores along with documents.
84
+
85
+ Returns:
86
+ List[str] or List[Tuple[str, float]]
87
+ """
88
+ if not documents:
89
+ return []
90
+
91
+ # Prepare (query, doc) pairs
92
+ pairs = [(query, doc) for doc in documents]
93
+
94
+ # Predict relevance scores
95
+ scores = reranker.predict(pairs, batch_size=16)
96
+
97
+ # Sort by score descending
98
+ reranked = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True)
99
+
100
+ if return_scores:
101
+ return reranked[:top_n]
102
+ else:
103
+ return [doc for doc, _ in reranked[:top_n]]
104
+
105
+
106
+ def generate_response_rag(query,model,index_dir="legal_index"):
107
+ # Step 1: Retrieve top-k context chunks using your FAISS setup
108
+ top_chunks = retrieve_top_k(query,'legal', "nlpaueb/legal-bert-base-uncased")
109
+
110
+ # Step 2: Rerank retrieved documents using cross-encoder
111
+ #reranked_chunks = rerank_documents(query, top_chunks, top_n=15)
112
+ #rerank_and_filter_chunks = filter_by_faithfulness(query, reranked_chunks)
113
+
114
+ #reranked_chunks = rerank_and_filter_chunks
115
+ reranked_chunks_bge = rerank_documents_bge(query, top_chunks, top_n=5)
116
+ #sum_context = summarize_context("\n\n".join(reranked_chunks_bge))
117
+
118
+
119
+
120
+ final_context = reranked_chunks_bge
121
+ # Step 2: Prepare context and RAG-style prompt
122
+ context = "\n\n".join(final_context)
123
+
124
+ #print(f"Context:{context}")
125
+ prompt = f"""You are a helpful legal assistant.
126
+ Use the following context to answer the question.
127
+ Using only the information from the retrieved context, answer the following question. If the answer cannot be derived, say "I don't know." Always have answer with prefix **Answer:**
128
+
129
+ Context:{context}
130
+
131
+ Question: {query}
132
+ Answer:"""
133
+
134
+ # Step 3: Call the LLM (LLaMA3 or any chat model)
135
+ chat_completion = client.chat.completions.create(
136
+ messages=[
137
+ {"role": "user", "content": prompt}
138
+ ],
139
+ model=model,#"gemma2-9b-it"#"qwen/qwen3-32b"#deepseek-r1-distill-llama-70b",#"llama3-70b-8192", # mistral-saba-24b
140
+ temperature=0.0
141
+ )
142
+
143
+ return chat_completion.choices[0].message.content.strip()
144
+
145
+ '''response = openai.chat.completions.create(
146
+ model="gpt-3.5-turbo",
147
+ messages=[
148
+ {"role": "user", "content": prompt}
149
+ ],
150
+ temperature=0.0,
151
+ max_tokens=1024
152
+ )
153
+
154
+ return response.choices[0].message.content'''
155
+
156
+ #JUDGE LLM
157
+
158
+ def split_into_keyed_sentences(text, prefix):
159
+ """Splits text into sentences with keys like '0a.', '0b.', or 'a.', 'b.', etc."""
160
+ # Basic sentence tokenizer with keys
161
+ sentences = re.split(r'(?<=[.?!])\s+', text.strip())
162
+ keyed = {}
163
+ for i, s in enumerate(sentences):
164
+ key = f"{prefix}{chr(97 + i)}" # 'a', 'b', ...
165
+ if s:
166
+ keyed[key] = s.strip()
167
+ return keyed
168
+
169
+
170
+ def jugde_response_rag(query, embedder="nlpaueb/legal-bert-base-uncased", domain="legal", k=5):
171
+
172
+ top_chunks = retrieve_top_k(query)
173
+
174
+ top_chunks = [chunk[0] if isinstance(chunk, tuple) else chunk for chunk in top_chunks]
175
+
176
+ # Step 2: Prepare context and RAG-style prompt
177
+ context = "\n\n".join(top_chunks)
178
+
179
+ # Split context and dummy answer into keyed sentences
180
+ document_keys = split_into_keyed_sentences(context, "0")
181
+
182
+ #print(f"Query:{query}\n====================================================================")
183
+ response = generate_response_rag(query,model="llama3-70b-8192") #deepseek-r1-distill-llama-70b llama3-70b-8192
184
+ #print(f"\n====================================\Generator Response:{response}")
185
+ #For deepseek
186
+ #print("Before Curated:",response)
187
+ response=response[response.find("**Answer"):].replace("**Answer","");
188
+
189
+ print(f"Response for Generator LLM:{response}")
190
+
191
+ response_keys = split_into_keyed_sentences(response, "")
192
+ # Rebuild sections for prompt
193
+ documents_formatted = "\n".join([f"{k}. {v}" for k, v in document_keys.items()])
194
+ response_formatted = "\n".join([f"{k}. {v}" for k, v in response_keys.items()])
195
+
196
+ '''print(f"\n====================================================================")
197
+ print(f"documents_formatted:{documents_formatted}")
198
+ print(f"\n====================================================================")
199
+ print(f"response_formatted:{response_formatted}")
200
+ print(f"\n====================================================================")'''
201
+
202
+
203
+ prompt = f"""I asked someone to answer a question based on one or more documents.
204
+ Your task is to review their response and assess whether or not each sentence
205
+ in that response is supported by text in the documents. And if so, which
206
+ sentences in the documents provide that support. You will also tell me which
207
+ of the documents contain useful information for answering the question, and
208
+ which of the documents the answer was sourced from.
209
+ Here are the documents, each of which is split into sentences. Alongside each
210
+ sentence is associated key, such as ’0a.’ or ’0b.’ that you can use to refer
211
+ to it:
212
+ '''
213
+ {documents_formatted}
214
+ '''
215
+ The question was:
216
+ '''
217
+ {query}
218
+ '''
219
+ Here is their response, split into sentences. Alongside each sentence is
220
+ associated key, such as ’a.’ or ’b.’ that you can use to refer to it. Note
221
+ that these keys are unique to the response, and are not related to the keys
222
+ in the documents:
223
+ '''
224
+ {response_formatted}
225
+ '''
226
+ You must respond with a JSON object matching this schema:
227
+ '''
228
+ {{
229
+ "relevance_explanation": string,
230
+ "all_relevant_sentence_keys": [string],
231
+ "overall_supported_explanation": string,
232
+ "overall_supported": boolean,
233
+ "sentence_support_information": [
234
+ {{
235
+ "response_sentence_key": string,
236
+ "explanation": string,
237
+ "supporting_sentence_keys": [string],
238
+ "fully_supported": boolean
239
+ }},
240
+ ],
241
+ "all_utilized_sentence_keys": [string]
242
+ }}
243
+ '''
244
+ The relevance_explanation field is a string explaining which documents
245
+ contain useful information for answering the question. Provide a step-by-step
246
+ breakdown of information provided in the documents and how it is useful for
247
+ answering the question.
248
+ The all_relevant_sentence_keys field is a list of all document sentences keys
249
+ (e.g. ’0a’) that are revant to the question. Include every sentence that is
250
+ useful and relevant to the question, even if it was not used in the response,
251
+ or if only parts of the sentence are useful. Ignore the provided response when
252
+ making this judgement and base your judgement solely on the provided documents
253
+ and question. Omit sentences that, if removed from the document, would not
254
+ impact someone’s ability to answer the question.
255
+ The overall_supported_explanation field is a string explaining why the response
256
+ *as a whole* is or is not supported by the documents. In this field, provide a
257
+ step-by-step breakdown of the claims made in the response and the support (or
258
+ lack thereof) for those claims in the documents. Begin by assessing each claim
259
+ separately, one by one; don’t make any remarks about the response as a whole
260
+ until you have assessed all the claims in isolation.
261
+ The overall_supported field is a boolean indicating whether the response as a
262
+ whole is supported by the documents. This value should reflect the conclusion
263
+ you drew at the end of your step-by-step breakdown in overall_supported_explanation.
264
+ In the sentence_support_information field, provide information about the support
265
+ *for each sentence* in the response.
266
+ The sentence_support_information field is a list of objects, one for each sentence
267
+ in the response. Each object MUST have the following fields:
268
+ - response_sentence_key: a string identifying the sentence in the response.
269
+ This key is the same as the one used in the response above.
270
+ - explanation: a string explaining why the sentence is or is not supported by the
271
+ documents.
272
+ - supporting_sentence_keys: keys (e.g. ’0a’) of sentences from the documents that
273
+ support the response sentence. If the sentence is not supported, this list MUST
274
+ be empty. If the sentence is supported, this list MUST contain one or more keys.
275
+ In special cases where the sentence is supported, but not by any specific sentence,
276
+ you can use the string "supported_without_sentence" to indicate that the sentence
277
+ is generally supported by the documents. Consider cases where the sentence is
278
+ expressing inability to answer the question due to lack of relevant information in
279
+ the provided contex as "supported_without_sentence". In cases where the sentence
280
+ is making a general statement (e.g. outlining the steps to produce an answer, or
281
+ summarizing previously stated sentences, or a transition sentence), use the
282
+ sting "general".In cases where the sentence is correctly stating a well-known fact,
283
+ like a mathematical formula, use the string "well_known_fact". In cases where the
284
+ sentence is performing numerical reasoning (e.g. addition, multiplication), use
285
+ the string "numerical_reasoning".
286
+ - fully_supported: a boolean indicating whether the sentence is fully supported by
287
+ the documents.
288
+ - This value should reflect the conclusion you drew at the end of your step-by-step
289
+ breakdown in explanation.
290
+ - If supporting_sentence_keys is an empty list, then fully_supported must be false.
291
+ 17
292
+ - Otherwise, use fully_supported to clarify whether everything in the response
293
+ sentence is fully supported by the document text indicated in supporting_sentence_keys
294
+ (fully_supported = true), or whether the sentence is only partially or incompletely
295
+ supported by that document text (fully_supported = false).
296
+ The all_utilized_sentence_keys field is a list of all sentences keys (e.g. ’0a’) that
297
+ were used to construct the answer. Include every sentence that either directly supported
298
+ the answer, or was implicitly used to construct the answer, even if it was not used
299
+ in its entirety. Omit sentences that were not used, and could have been removed from
300
+ the documents without affecting the answer.
301
+ You must respond with a valid JSON string. Use escapes for quotes, e.g. ‘\\"‘, and
302
+ newlines, e.g. ‘\\n‘. Do not write anything before or after the JSON string. Do not
303
+ wrap the JSON string in backticks like ‘‘‘ or ‘‘‘json.
304
+ As a reminder: your task is to review the response and assess which documents contain
305
+ useful information pertaining to the question, and how each sentence in the response
306
+ is supported by the text in the documents.\
307
+ """
308
+
309
+ # Step 3: Call the LLM
310
+ chat_completion = client.chat.completions.create(
311
+ messages=[
312
+ {"role": "user", "content": prompt}
313
+ ],
314
+ model="meta-llama/llama-4-maverick-17b-128e-instruct", #deepseek-r1-distill-llama-70b llama3-70b-8192 meta-llama/llama-4-maverick-17b-128e-instruct
315
+ )
316
+
317
+ return documents_formatted,chat_completion.choices[0].message.content.strip()
318
+
319
+ '''chat_completion = openai.chat.completions.create(
320
+ messages=[
321
+ {"role":"user",
322
+ "content":prompt}
323
+ ],
324
+ model="gpt-4o",
325
+ max_tokens=1024,
326
+
327
+ )
328
+ return documents_formatted,chat_completion.choices[0].message.content'''
329
+
330
+ def extract_retrieved_sentence_keys(document_text: str) -> list[str]:
331
+ """
332
+ Extracts sentence keys like '0a.', '0b.', etc. from a formatted document string.
333
+
334
+ Parameters:
335
+ - document_text (str): full text of document with sentence keys
336
+
337
+ Returns:
338
+ - List of unique sentence keys in the order they appear
339
+ """
340
+ # Match pattern like 0a., 0b., 0z., 0{., 0|., etc.
341
+ pattern = r'\b0[\w\{\|\}~€‚]\.'
342
+
343
+ matches = re.findall(pattern, document_text)
344
+ return list(dict.fromkeys(matches)) # Removes duplicates while preserving order
345
+
346
+ def compute_ragbench_metrics(judge_response: dict, retrieved_sentence_keys: list[str]) -> dict:
347
+ """
348
+ Computes RAGBench-style metrics from Judge LLM response.
349
+
350
+ Parameters:
351
+ - judge_response (dict): JSON response from Judge LLM
352
+ - retrieved_sentence_keys (list of str): all sentence keys from the retrieved documents
353
+
354
+ Returns:
355
+ - Dictionary with Context Relevance, Context Utilization, Completeness, and Adherence
356
+ """
357
+
358
+ R = set(judge_response.get("all_relevant_sentence_keys", [])) # Relevant sentences
359
+ U = set(judge_response.get("all_utilized_sentence_keys", [])) # Utilized sentences
360
+ intersection_RU = R & U
361
+
362
+ total_retrieved = len(retrieved_sentence_keys)
363
+ len_R = len(R)
364
+ len_U = len(U)
365
+ len_intersection = len(intersection_RU)
366
+
367
+ # Context Relevance: fraction of retrieved context that is relevant
368
+ context_relevance = len_R / total_retrieved if total_retrieved else 0.0
369
+
370
+ # Context Utilization: fraction of retrieved context that was used
371
+ context_utilization = len_U / total_retrieved if total_retrieved else 0.0
372
+
373
+ # Completeness: fraction of relevant content that was used
374
+ completeness = len_intersection / len_R if len_R else 0.0
375
+
376
+ # Adherence: 1 if all response sentences are fully supported, else 0
377
+ is_fully_supported = all(s.get("fully_supported", False)
378
+ for s in judge_response.get("sentence_support_information", []))
379
+ adherence = 1.0 if is_fully_supported and judge_response.get("overall_supported", False) else 0.0
380
+
381
+ return {
382
+ "Context Relevance": round(context_relevance, 4),
383
+ "Context Utilization": round(context_utilization, 4),
384
+ "Completeness": round(completeness, 4),
385
+ "Adherence": adherence
386
+ }
387
+
388
+
389
+ def compute_rmse(gt, pred):
390
+ return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
391
+
392
+
393
+ def evaluate_rag_pipeline(q_indices):
394
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
395
+
396
+ def safe_append(gt_list, pred_list, gt_val, pred_val):
397
+ if gt_val is not None and pred_val is not None:
398
+ gt_list.append(gt_val)
399
+ pred_list.append(pred_val)
400
+
401
+ def clean_and_parse_json_block(text):
402
+ # Strip markdown-style code block if present
403
+ #text = text.strip().strip("`").strip()
404
+ code_block_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
405
+ if code_block_match:
406
+ text = code_block_match.group(1).strip()
407
+
408
+ # Remove invalid/control characters that break decoding
409
+ text = re.sub(r"[^\x20-\x7E\n\t]", "", text)
410
+
411
+ try:
412
+ return json.loads(text)
413
+ except json.JSONDecodeError as e:
414
+ print("❌ JSON Decode Error:", e)
415
+ print("⚠️ Cleaned text:\n", text)
416
+ raise
417
+
418
+
419
+ gt_relevance, pred_relevance = [], []
420
+ gt_utilization, pred_utilization = [], []
421
+ gt_completeness, pred_completeness = [], []
422
+ gt_adherence, pred_adherence = [], []
423
+
424
+ for i in q_indices:
425
+ query = dataset[i]['question']
426
+ print(f"\n\n\nQuery:{i}.{query}\n====================================================================")
427
+ documents_formatted, response = jugde_response_rag(
428
+ query, embedder="nlpaueb/legal-bert-base-uncased", domain="legal")
429
+ judge_response = clean_and_parse_json_block(response)
430
+ print(f"\n======================================================================\nResponse:{judge_response}")
431
+ retrieved_sentences = extract_retrieved_sentence_keys(documents_formatted)
432
+ predicted = compute_ragbench_metrics(judge_response, retrieved_sentences)
433
+
434
+ # GT values
435
+ gt_r = dataset[i].get('relevance_score')
436
+ gt_u = dataset[i].get('utilization_score')
437
+ gt_c = dataset[i].get('completeness_score')
438
+ gt_a = dataset[i].get('gpt3_adherence')
439
+
440
+ safe_append(gt_relevance, pred_relevance, gt_r, predicted['Context Relevance'])
441
+ safe_append(gt_utilization, pred_utilization, gt_u, predicted['Context Utilization'])
442
+ safe_append(gt_completeness, pred_completeness, gt_c, predicted['Completeness'])
443
+ if gt_a is not None and predicted['Adherence'] is not None:
444
+ safe_append(gt_adherence, pred_adherence, int(gt_a), int(predicted['Adherence']))
445
+
446
+ def compute_rmse(gt, pred):
447
+ return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
448
+
449
+ result = {
450
+ "Context Relevance": compute_rmse(gt_relevance, pred_relevance),
451
+ "Context Utilization": compute_rmse(gt_utilization, pred_utilization),
452
+ "Completeness": compute_rmse(gt_completeness, pred_completeness),
453
+ }
454
+
455
+ if len(set(gt_adherence)) == 2:
456
+ result["Adherence"] = compute_rmse(gt_adherence, pred_adherence)
457
+ result["AUC-ROC (Adherence)"] = round(roc_auc_score(gt_adherence, pred_adherence), 4)
458
+ else:
459
+ result["Adherence"] = compute_rmse(gt_adherence, pred_adherence)
460
+ result["AUC-ROC (Adherence)"] = "N/A - one class only"
461
+
462
+ return result
463
+
464
+
465
+ # Wrapper to parse textbox input into list of ints
466
+ def evaluate_rag_gradio(q_indices_str):
467
+ # Capture printed logs
468
+ log_stream = io.StringIO()
469
+ sys.stdout = log_stream
470
+
471
+ try:
472
+ q_indices = [int(x.strip()) for x in q_indices_str.split(",") if x.strip().isdigit()]
473
+ results = evaluate_rag_pipeline(q_indices)
474
+
475
+ # Return metrics and logs
476
+ logs = log_stream.getvalue()
477
+ return results, logs
478
+
479
+ except Exception as e:
480
+ traceback.print_exc()
481
+ return {"error": str(e)}, log_stream.getvalue()
482
+
483
+ finally:
484
+ sys.stdout = sys.__stdout__
485
+
486
+ iface = gr.Interface(
487
+ fn=evaluate_rag_gradio,
488
+ inputs=gr.Textbox(label="Comma-separated Query Indices (e.g. 89,121,245)", lines=1),
489
+ outputs=[
490
+ gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
491
+ gr.Textbox(label="Execution Log", lines=5, interactive=True)
492
+ ],
493
+ title="RAG Evaluation Dashboard",
494
+ description="Evaluate your RAG pipeline across selected queries using GPT-based generation and judgment."
495
+ )
496
+
497
+ iface.launch(debug=True)
cs_dataset/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c81fcd283298c766efceed51005f94977eb042565a6d6e32a141af3516eddab
3
+ size 88920
cs_dataset/dataset_info.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "text": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ }
9
+ },
10
+ "homepage": "",
11
+ "license": ""
12
+ }
cs_dataset/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "ec44a3721c635a27",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
cs_index/faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781715fcdd5abaccf46e7df9e34cb8fe08cefa3f47fc4381c1530e83ad3d3cb6
3
+ size 370221
fin_dataset/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b6bb5e029518500e5764893d7267aa86d93e7a0e8ceae7969c371f17b42e3fc
3
+ size 1504056
fin_dataset/dataset_info.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "text": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ }
9
+ },
10
+ "homepage": "",
11
+ "license": ""
12
+ }
fin_dataset/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "fe416e18cf3f19d0",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
fin_index/faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb1e231b83001723586362b682f80487689ad9bb208a1c8dea3bade5d004cbd
3
+ size 6039597
gk_dataset/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ff3d603dd8f386d4f55dfb696d9a486e29e2c948c7e4cb03291b3f1185e671
3
+ size 777424
gk_dataset/dataset_info.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "text": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ }
9
+ },
10
+ "homepage": "",
11
+ "license": ""
12
+ }
gk_dataset/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "00d8c8388a8ac73c",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
gk_index/faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c22862009c798b27b235f0af137915a98fad631649735dbf19a467e3f896be6
3
+ size 3526701
med_dataset/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7eaae975a43389ed3ccd143dfcfca1e61ad094e3064ec477f36f9cd47d11ad
3
+ size 2245824
med_dataset/dataset_info.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "text": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ }
9
+ },
10
+ "homepage": "",
11
+ "license": ""
12
+ }
med_dataset/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "e0ae8ccbcca935ea",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
med_index/faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678041534c1d3641eaaed36f5efca24094762a1454eb6bdd413c2973b94c5dff
3
+ size 11473965