Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -225,13 +225,16 @@ def fetch_content(url, query):
|
|
225 |
|
226 |
def calculate_relevance_score(page_content, query, co, model_type='english'):
|
227 |
try:
|
228 |
-
if not page_content:
|
|
|
229 |
return 0
|
230 |
-
|
231 |
model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
|
232 |
-
|
233 |
-
page_embedding =
|
234 |
-
query_embedding =
|
|
|
|
|
|
|
235 |
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|
236 |
return score
|
237 |
except Exception as e:
|
@@ -245,41 +248,46 @@ def analyze_competitors(row, co, custom_url=None, country_code=None):
|
|
245 |
query = row['query']
|
246 |
our_url = normalize_url(row['page'])
|
247 |
model_type = row.get('model_type', 'english') # Assuming you store model_type per row
|
248 |
-
|
249 |
competitor_data = get_serp_results(query, country_code)
|
250 |
-
|
251 |
results = []
|
|
|
|
|
252 |
for data in competitor_data:
|
253 |
competitor_url = normalize_url(data['url'])
|
254 |
score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
|
|
|
|
|
|
|
255 |
results.append({
|
256 |
'Position': data['position'],
|
257 |
'URL': competitor_url,
|
258 |
'Score': score,
|
259 |
-
'is_our_url':
|
260 |
})
|
261 |
-
|
262 |
# Retrieve "Our Score" from the main data table
|
263 |
our_score = st.session_state.relevancy_scores.get(our_url, 0)
|
264 |
-
|
265 |
-
if not
|
266 |
results.append({
|
267 |
'Position': len(results) + 1,
|
268 |
'URL': f"{our_url} (Our URL)",
|
269 |
'Score': our_score,
|
270 |
'is_our_url': True
|
271 |
})
|
272 |
-
|
273 |
# Sort results by position in ascending order
|
274 |
results = sorted(results, key=lambda x: x['Position'])
|
275 |
-
|
276 |
# Create DataFrame
|
277 |
results_df = pd.DataFrame(results)
|
278 |
results_df['Position'] = results_df['Position'].astype(int)
|
279 |
-
|
280 |
# Keep only the columns we want to display
|
281 |
results_df = results_df[['Position', 'URL', 'Score']]
|
282 |
-
|
283 |
return results_df
|
284 |
|
285 |
def show_competitor_analysis(row, co, country_code):
|
@@ -407,7 +415,6 @@ def calculate_relevancy_scores(df, model_type):
|
|
407 |
if pd.isna(row['relevancy_score']) or row['relevancy_score'] == 0:
|
408 |
score = calculate_single_relevancy(row)
|
409 |
df.at[index, 'relevancy_score'] = score
|
410 |
-
st.session_state.relevancy_scores[normalize_url(row['page'])] = score
|
411 |
return df
|
412 |
|
413 |
# -------------
|
@@ -512,6 +519,7 @@ def calculate_single_relevancy(row):
|
|
512 |
query = row['query']
|
513 |
model_type = st.session_state.get('model_type_selector', 'english') # Retrieve from session state
|
514 |
score = calculate_relevance_score(page_content, query, co, model_type=model_type)
|
|
|
515 |
return score
|
516 |
|
517 |
def compare_with_top_result(row, co, country_code):
|
|
|
225 |
|
226 |
def calculate_relevance_score(page_content, query, co, model_type='english'):
|
227 |
try:
|
228 |
+
if not page_content.strip():
|
229 |
+
st.warning("Page content is empty. Cannot calculate relevance score.")
|
230 |
return 0
|
|
|
231 |
model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
|
232 |
+
embeddings = co.embed(texts=[page_content, query], model=model, input_type=['search_document', 'search_query'])
|
233 |
+
page_embedding = embeddings.embeddings[0]
|
234 |
+
query_embedding = embeddings.embeddings[1]
|
235 |
+
if not any(page_embedding) or not any(query_embedding):
|
236 |
+
st.warning("One of the embeddings is empty. Returning a score of 0.")
|
237 |
+
return 0
|
238 |
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|
239 |
return score
|
240 |
except Exception as e:
|
|
|
248 |
query = row['query']
|
249 |
our_url = normalize_url(row['page'])
|
250 |
model_type = row.get('model_type', 'english') # Assuming you store model_type per row
|
251 |
+
|
252 |
competitor_data = get_serp_results(query, country_code)
|
253 |
+
|
254 |
results = []
|
255 |
+
our_url_found = False # Flag to check if our URL is in the results
|
256 |
+
|
257 |
for data in competitor_data:
|
258 |
competitor_url = normalize_url(data['url'])
|
259 |
score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
|
260 |
+
is_our = competitor_url == our_url
|
261 |
+
if is_our:
|
262 |
+
our_url_found = True
|
263 |
results.append({
|
264 |
'Position': data['position'],
|
265 |
'URL': competitor_url,
|
266 |
'Score': score,
|
267 |
+
'is_our_url': is_our
|
268 |
})
|
269 |
+
|
270 |
# Retrieve "Our Score" from the main data table
|
271 |
our_score = st.session_state.relevancy_scores.get(our_url, 0)
|
272 |
+
|
273 |
+
if not our_url_found:
|
274 |
results.append({
|
275 |
'Position': len(results) + 1,
|
276 |
'URL': f"{our_url} (Our URL)",
|
277 |
'Score': our_score,
|
278 |
'is_our_url': True
|
279 |
})
|
280 |
+
|
281 |
# Sort results by position in ascending order
|
282 |
results = sorted(results, key=lambda x: x['Position'])
|
283 |
+
|
284 |
# Create DataFrame
|
285 |
results_df = pd.DataFrame(results)
|
286 |
results_df['Position'] = results_df['Position'].astype(int)
|
287 |
+
|
288 |
# Keep only the columns we want to display
|
289 |
results_df = results_df[['Position', 'URL', 'Score']]
|
290 |
+
|
291 |
return results_df
|
292 |
|
293 |
def show_competitor_analysis(row, co, country_code):
|
|
|
415 |
if pd.isna(row['relevancy_score']) or row['relevancy_score'] == 0:
|
416 |
score = calculate_single_relevancy(row)
|
417 |
df.at[index, 'relevancy_score'] = score
|
|
|
418 |
return df
|
419 |
|
420 |
# -------------
|
|
|
519 |
query = row['query']
|
520 |
model_type = st.session_state.get('model_type_selector', 'english') # Retrieve from session state
|
521 |
score = calculate_relevance_score(page_content, query, co, model_type=model_type)
|
522 |
+
st.session_state.relevancy_scores[normalize_url(row['page'])] = score # Ensure score is stored
|
523 |
return score
|
524 |
|
525 |
def compare_with_top_result(row, co, country_code):
|