Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,6 +23,9 @@ from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
|
23 |
from langchain_core.documents import Document
|
24 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
25 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
@@ -461,18 +464,30 @@ def export_news_to_excel():
|
|
461 |
if 'relevance_score' not in df.columns:
|
462 |
df['relevance_score'] = 0.0
|
463 |
else:
|
464 |
-
df['relevance_score'] = df['relevance_score'].
|
465 |
|
466 |
# Reorder columns to put relevance_score after summary
|
467 |
columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
|
468 |
df = df[columns]
|
469 |
|
470 |
-
# Format relevance_score to display 2 decimal places
|
471 |
-
df['relevance_score'] = df['relevance_score'].apply(lambda x: f"{x:.2f}")
|
472 |
-
|
473 |
with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
|
474 |
excel_path = tmp.name
|
475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
476 |
|
477 |
return excel_path
|
478 |
|
|
|
23 |
from langchain_core.documents import Document
|
24 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
25 |
from sklearn.metrics.pairwise import cosine_similarity
|
26 |
+
from openpyxl import load_workbook
|
27 |
+
from openpyxl.utils.dataframe import dataframe_to_rows
|
28 |
+
|
29 |
|
30 |
|
31 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
464 |
if 'relevance_score' not in df.columns:
|
465 |
df['relevance_score'] = 0.0
|
466 |
else:
|
467 |
+
df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
|
468 |
|
469 |
# Reorder columns to put relevance_score after summary
|
470 |
columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
|
471 |
df = df[columns]
|
472 |
|
|
|
|
|
|
|
473 |
with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
|
474 |
excel_path = tmp.name
|
475 |
+
|
476 |
+
# First, save the DataFrame to Excel
|
477 |
+
df.to_excel(excel_path, index=False, engine='openpyxl')
|
478 |
+
|
479 |
+
# Then, load the workbook and modify the relevance_score column
|
480 |
+
wb = load_workbook(excel_path)
|
481 |
+
ws = wb.active
|
482 |
+
|
483 |
+
for row in dataframe_to_rows(df, index=False, header=True):
|
484 |
+
ws.append(row)
|
485 |
+
|
486 |
+
# Format the relevance_score column as numbers
|
487 |
+
for cell in ws['F'][1:]: # Assuming relevance_score is in column F
|
488 |
+
cell.number_format = '0.00'
|
489 |
+
|
490 |
+
wb.save(excel_path)
|
491 |
|
492 |
return excel_path
|
493 |
|