Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -318,12 +318,16 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
|
|
318 |
"content": clean_content,
|
319 |
"summary": full_summary,
|
320 |
"cleaned_summary": cleaned_summary,
|
321 |
-
"relevance_score": relevance_score
|
322 |
}
|
323 |
processed_articles.append(processed_article)
|
324 |
except Exception as e:
|
325 |
print(f"Error processing article: {str(e)}")
|
326 |
|
|
|
|
|
|
|
|
|
327 |
|
328 |
if not processed_articles:
|
329 |
return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
|
@@ -347,7 +351,11 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
|
|
347 |
|
348 |
# Update news_database for excel export
|
349 |
global news_database
|
350 |
-
news_database
|
|
|
|
|
|
|
|
|
351 |
|
352 |
return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
|
353 |
except Exception as e:
|
@@ -454,11 +462,15 @@ def fetch_golomt_bank_news(num_results=10):
|
|
454 |
|
455 |
def export_news_to_excel():
|
456 |
global news_database
|
457 |
-
df = pd.DataFrame(news_database)
|
458 |
|
459 |
-
|
460 |
-
|
461 |
-
|
|
|
|
|
|
|
|
|
|
|
462 |
|
463 |
# Ensure relevance_score is present and convert to float
|
464 |
if 'relevance_score' not in df.columns:
|
@@ -466,28 +478,22 @@ def export_news_to_excel():
|
|
466 |
else:
|
467 |
df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
|
468 |
|
|
|
|
|
|
|
|
|
|
|
469 |
# Reorder columns to put relevance_score after summary
|
470 |
columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
|
471 |
-
df = df[columns]
|
|
|
|
|
|
|
472 |
|
473 |
with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
|
474 |
excel_path = tmp.name
|
475 |
-
|
476 |
-
# First, save the DataFrame to Excel
|
477 |
df.to_excel(excel_path, index=False, engine='openpyxl')
|
478 |
-
|
479 |
-
# Then, load the workbook and modify the relevance_score column
|
480 |
-
wb = load_workbook(excel_path)
|
481 |
-
ws = wb.active
|
482 |
-
|
483 |
-
for row in dataframe_to_rows(df, index=False, header=True):
|
484 |
-
ws.append(row)
|
485 |
-
|
486 |
-
# Format the relevance_score column as numbers
|
487 |
-
for cell in ws['F'][1:]: # Assuming relevance_score is in column F
|
488 |
-
cell.number_format = '0.00'
|
489 |
-
|
490 |
-
wb.save(excel_path)
|
491 |
|
492 |
return excel_path
|
493 |
|
|
|
318 |
"content": clean_content,
|
319 |
"summary": full_summary,
|
320 |
"cleaned_summary": cleaned_summary,
|
321 |
+
"relevance_score": relevance_score
|
322 |
}
|
323 |
processed_articles.append(processed_article)
|
324 |
except Exception as e:
|
325 |
print(f"Error processing article: {str(e)}")
|
326 |
|
327 |
+
# Debug print
|
328 |
+
print("Processed articles:")
|
329 |
+
for article in processed_articles:
|
330 |
+
print(f"Title: {article['title']}, Score: {article['relevance_score']}")
|
331 |
|
332 |
if not processed_articles:
|
333 |
return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
|
|
|
351 |
|
352 |
# Update news_database for excel export
|
353 |
global news_database
|
354 |
+
news_database = processed_articles # Directly assign the processed articles
|
355 |
+
|
356 |
+
print("Updated news_database:")
|
357 |
+
for article in news_database:
|
358 |
+
print(f"Title: {article['title']}, Score: {article['relevance_score']}")
|
359 |
|
360 |
return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
|
361 |
except Exception as e:
|
|
|
462 |
|
463 |
def export_news_to_excel():
|
464 |
global news_database
|
|
|
465 |
|
466 |
+
if not news_database:
|
467 |
+
return "No articles to export. Please fetch news first."
|
468 |
+
|
469 |
+
print("Exporting the following articles:")
|
470 |
+
for article in news_database:
|
471 |
+
print(f"Title: {article['title']}, Score: {article.get('relevance_score', 'N/A')}")
|
472 |
+
|
473 |
+
df = pd.DataFrame(news_database)
|
474 |
|
475 |
# Ensure relevance_score is present and convert to float
|
476 |
if 'relevance_score' not in df.columns:
|
|
|
478 |
else:
|
479 |
df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
|
480 |
|
481 |
+
# Use the cleaned summary for the Excel export
|
482 |
+
if 'cleaned_summary' in df.columns:
|
483 |
+
df['summary'] = df['cleaned_summary']
|
484 |
+
df = df.drop(columns=['cleaned_summary'])
|
485 |
+
|
486 |
# Reorder columns to put relevance_score after summary
|
487 |
columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
|
488 |
+
df = df[[col for col in columns if col in df.columns]]
|
489 |
+
|
490 |
+
print("Final DataFrame before export:")
|
491 |
+
print(df[['title', 'relevance_score']])
|
492 |
|
493 |
with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
|
494 |
excel_path = tmp.name
|
|
|
|
|
495 |
df.to_excel(excel_path, index=False, engine='openpyxl')
|
496 |
+
print(f"Excel file saved to: {excel_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
497 |
|
498 |
return excel_path
|
499 |
|