poemsforaphrodite commited on
Commit
474ca04
·
verified ·
1 Parent(s): 99665c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -225,13 +225,16 @@ def fetch_content(url, query):
225
 
226
  def calculate_relevance_score(page_content, query, co, model_type='english'):
227
  try:
228
- if not page_content:
 
229
  return 0
230
-
231
  model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
232
-
233
- page_embedding = co.embed(texts=[page_content], model=model, input_type='search_document').embeddings[0]
234
- query_embedding = co.embed(texts=[query], model=model, input_type='search_query').embeddings[0]
 
 
 
235
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
236
  return score
237
  except Exception as e:
@@ -245,41 +248,46 @@ def analyze_competitors(row, co, custom_url=None, country_code=None):
245
  query = row['query']
246
  our_url = normalize_url(row['page'])
247
  model_type = row.get('model_type', 'english') # Assuming you store model_type per row
248
-
249
  competitor_data = get_serp_results(query, country_code)
250
-
251
  results = []
 
 
252
  for data in competitor_data:
253
  competitor_url = normalize_url(data['url'])
254
  score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
 
 
 
255
  results.append({
256
  'Position': data['position'],
257
  'URL': competitor_url,
258
  'Score': score,
259
- 'is_our_url': competitor_url == our_url
260
  })
261
-
262
  # Retrieve "Our Score" from the main data table
263
  our_score = st.session_state.relevancy_scores.get(our_url, 0)
264
-
265
- if not any(r['is_our_url'] for r in results):
266
  results.append({
267
  'Position': len(results) + 1,
268
  'URL': f"{our_url} (Our URL)",
269
  'Score': our_score,
270
  'is_our_url': True
271
  })
272
-
273
  # Sort results by position in ascending order
274
  results = sorted(results, key=lambda x: x['Position'])
275
-
276
  # Create DataFrame
277
  results_df = pd.DataFrame(results)
278
  results_df['Position'] = results_df['Position'].astype(int)
279
-
280
  # Keep only the columns we want to display
281
  results_df = results_df[['Position', 'URL', 'Score']]
282
-
283
  return results_df
284
 
285
  def show_competitor_analysis(row, co, country_code):
@@ -407,7 +415,6 @@ def calculate_relevancy_scores(df, model_type):
407
  if pd.isna(row['relevancy_score']) or row['relevancy_score'] == 0:
408
  score = calculate_single_relevancy(row)
409
  df.at[index, 'relevancy_score'] = score
410
- st.session_state.relevancy_scores[normalize_url(row['page'])] = score
411
  return df
412
 
413
  # -------------
@@ -512,6 +519,7 @@ def calculate_single_relevancy(row):
512
  query = row['query']
513
  model_type = st.session_state.get('model_type_selector', 'english') # Retrieve from session state
514
  score = calculate_relevance_score(page_content, query, co, model_type=model_type)
 
515
  return score
516
 
517
  def compare_with_top_result(row, co, country_code):
 
225
 
226
  def calculate_relevance_score(page_content, query, co, model_type='english'):
227
  try:
228
+ if not page_content.strip():
229
+ st.warning("Page content is empty. Cannot calculate relevance score.")
230
  return 0
 
231
  model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
232
+ embeddings = co.embed(texts=[page_content, query], model=model, input_type=['search_document', 'search_query'])
233
+ page_embedding = embeddings.embeddings[0]
234
+ query_embedding = embeddings.embeddings[1]
235
+ if not any(page_embedding) or not any(query_embedding):
236
+ st.warning("One of the embeddings is empty. Returning a score of 0.")
237
+ return 0
238
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
239
  return score
240
  except Exception as e:
 
248
  query = row['query']
249
  our_url = normalize_url(row['page'])
250
  model_type = row.get('model_type', 'english') # Assuming you store model_type per row
251
+
252
  competitor_data = get_serp_results(query, country_code)
253
+
254
  results = []
255
+ our_url_found = False # Flag to check if our URL is in the results
256
+
257
  for data in competitor_data:
258
  competitor_url = normalize_url(data['url'])
259
  score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
260
+ is_our = competitor_url == our_url
261
+ if is_our:
262
+ our_url_found = True
263
  results.append({
264
  'Position': data['position'],
265
  'URL': competitor_url,
266
  'Score': score,
267
+ 'is_our_url': is_our
268
  })
269
+
270
  # Retrieve "Our Score" from the main data table
271
  our_score = st.session_state.relevancy_scores.get(our_url, 0)
272
+
273
+ if not our_url_found:
274
  results.append({
275
  'Position': len(results) + 1,
276
  'URL': f"{our_url} (Our URL)",
277
  'Score': our_score,
278
  'is_our_url': True
279
  })
280
+
281
  # Sort results by position in ascending order
282
  results = sorted(results, key=lambda x: x['Position'])
283
+
284
  # Create DataFrame
285
  results_df = pd.DataFrame(results)
286
  results_df['Position'] = results_df['Position'].astype(int)
287
+
288
  # Keep only the columns we want to display
289
  results_df = results_df[['Position', 'URL', 'Score']]
290
+
291
  return results_df
292
 
293
  def show_competitor_analysis(row, co, country_code):
 
415
  if pd.isna(row['relevancy_score']) or row['relevancy_score'] == 0:
416
  score = calculate_single_relevancy(row)
417
  df.at[index, 'relevancy_score'] = score
 
418
  return df
419
 
420
  # -------------
 
519
  query = row['query']
520
  model_type = st.session_state.get('model_type_selector', 'english') # Retrieve from session state
521
  score = calculate_relevance_score(page_content, query, co, model_type=model_type)
522
+ st.session_state.relevancy_scores[normalize_url(row['page'])] = score # Ensure score is stored
523
  return score
524
 
525
  def compare_with_top_result(row, co, country_code):