Commit
5de31b9
1 Parent(s): 5935473

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -6
app.py CHANGED
@@ -59,13 +59,21 @@ def evaluate_model_with_insights(model_name):
59
  for dataset_name, dataset in datasets.items():
60
  all_mrr, all_map, all_ndcg = [], [], []
61
  dataset_samples = []
62
-
63
  if 'candidate_document' in dataset.column_names:
64
  grouped_data = dataset.to_pandas().groupby("query")
65
  for query, group in grouped_data:
66
- candidate_texts = group['candidate_document'].tolist()
 
 
 
 
67
  relevance_labels = group['relevance_label'].tolist()
68
- pairs = [(query, doc) for doc in candidate_texts]
 
 
 
 
 
69
  scores = model.predict(pairs)
70
 
71
  # Collecting top-5 results for display
@@ -83,8 +91,21 @@ def evaluate_model_with_insights(model_name):
83
  else:
84
  for entry in dataset:
85
  query = entry['query']
86
- candidate_texts = [entry['positive'], entry['negative1'], entry['negative2'], entry['negative3'], entry['negative4']]
87
- relevance_labels = [1, 0, 0, 0, 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  pairs = [(query, doc) for doc in candidate_texts]
89
  scores = model.predict(pairs)
90
 
@@ -100,6 +121,27 @@ def evaluate_model_with_insights(model_name):
100
  all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
101
  all_map.append(mean_average_precision(relevance_labels, scores))
102
  all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  # Metrics for this dataset
105
  results.append({
@@ -155,4 +197,4 @@ interface = gr.Interface(
155
  )
156
  )
157
 
158
- interface.launch(debug=True)
 
59
  for dataset_name, dataset in datasets.items():
60
  all_mrr, all_map, all_ndcg = [], [], []
61
  dataset_samples = []
 
62
  if 'candidate_document' in dataset.column_names:
63
  grouped_data = dataset.to_pandas().groupby("query")
64
  for query, group in grouped_data:
65
+ # Skip invalid queries
66
+ if query is None or not isinstance(query, str) or query.strip() == "":
67
+ continue
68
+
69
+ candidate_texts = group['candidate_document'].dropna().tolist()
70
  relevance_labels = group['relevance_label'].tolist()
71
+
72
+ # Skip if no valid candidate documents
73
+ if not candidate_texts or len(candidate_texts) != len(relevance_labels):
74
+ continue
75
+
76
+ pairs = [(query, doc) for doc in candidate_texts if doc is not None and isinstance(doc, str) and doc.strip() != ""]
77
  scores = model.predict(pairs)
78
 
79
  # Collecting top-5 results for display
 
91
  else:
92
  for entry in dataset:
93
  query = entry['query']
94
+
95
+ # Validate query and documents
96
+ if query is None or not isinstance(query, str) or query.strip() == "":
97
+ continue
98
+
99
+ candidate_texts = [
100
+ doc for doc in [entry.get('positive'), entry.get('negative1'), entry.get('negative2'), entry.get('negative3'), entry.get('negative4')]
101
+ if doc is not None and isinstance(doc, str) and doc.strip() != ""
102
+ ]
103
+ relevance_labels = [1] + [0] * (len(candidate_texts) - 1)
104
+
105
+ # Skip if no valid candidate documents
106
+ if not candidate_texts or len(candidate_texts) != len(relevance_labels):
107
+ continue
108
+
109
  pairs = [(query, doc) for doc in candidate_texts]
110
  scores = model.predict(pairs)
111
 
 
121
  all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
122
  all_map.append(mean_average_precision(relevance_labels, scores))
123
  all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
124
+
125
+ else:
126
+ for entry in dataset:
127
+ query = entry['query']
128
+ candidate_texts = [entry['positive'], entry['negative1'], entry['negative2'], entry['negative3'], entry['negative4']]
129
+ relevance_labels = [1, 0, 0, 0, 0]
130
+ pairs = [(query, doc) for doc in candidate_texts]
131
+ scores = model.predict(pairs)
132
+
133
+ # Collecting top-5 results for display
134
+ sorted_indices = np.argsort(scores)[::-1]
135
+ top_docs = [(candidate_texts[i], scores[i], relevance_labels[i]) for i in sorted_indices[:5]]
136
+ dataset_samples.append({
137
+ "Query": query,
138
+ "Top 5 Candidates": top_docs
139
+ })
140
+
141
+ # Metrics
142
+ all_mrr.append(mean_reciprocal_rank(relevance_labels, scores))
143
+ all_map.append(mean_average_precision(relevance_labels, scores))
144
+ all_ndcg.append(ndcg_at_k(relevance_labels, scores, k=10))
145
 
146
  # Metrics for this dataset
147
  results.append({
 
197
  )
198
  )
199
 
200
+ interface.launch(debug=True)