SearchGPT

Running

App Files Files Community

Shreyas094 commited on Jul 9, 2024

Commit

1f8184f

verified ·

1 Parent(s): f1dc47a

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -22

app.py CHANGED Viewed

@@ -318,12 +318,16 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
                 "content": clean_content,
                 "summary": full_summary,
                 "cleaned_summary": cleaned_summary,
-                "relevance_score": relevance_score  # Ensure this line is present
             }
             processed_articles.append(processed_article)
         except Exception as e:
             print(f"Error processing article: {str(e)}")
     if not processed_articles:
         return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
@@ -347,7 +351,11 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
         # Update news_database for excel export
         global news_database
-        news_database.extend(processed_articles)
         return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
     except Exception as e:
@@ -454,11 +462,15 @@ def fetch_golomt_bank_news(num_results=10):
 def export_news_to_excel():
     global news_database
-    df = pd.DataFrame(news_database)
-    # Use the cleaned summary for the Excel export
-    df['summary'] = df['cleaned_summary']
-    df = df.drop(columns=['cleaned_summary'])  # Remove the extra column
     # Ensure relevance_score is present and convert to float
     if 'relevance_score' not in df.columns:
@@ -466,28 +478,22 @@ def export_news_to_excel():
     else:
         df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
     # Reorder columns to put relevance_score after summary
     columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
-    df = df[columns]
     with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
         excel_path = tmp.name
-        # First, save the DataFrame to Excel
         df.to_excel(excel_path, index=False, engine='openpyxl')
-        # Then, load the workbook and modify the relevance_score column
-        wb = load_workbook(excel_path)
-        ws = wb.active
-        for row in dataframe_to_rows(df, index=False, header=True):
-            ws.append(row)
-        # Format the relevance_score column as numbers
-        for cell in ws['F'][1:]:  # Assuming relevance_score is in column F
-            cell.number_format = '0.00'
-        wb.save(excel_path)
     return excel_path

                 "content": clean_content,
                 "summary": full_summary,
                 "cleaned_summary": cleaned_summary,
+                "relevance_score": relevance_score
             }
             processed_articles.append(processed_article)
         except Exception as e:
             print(f"Error processing article: {str(e)}")
+    # Debug print
+    print("Processed articles:")
+    for article in processed_articles:
+        print(f"Title: {article['title']}, Score: {article['relevance_score']}")
     if not processed_articles:
         return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
         # Update news_database for excel export
         global news_database
+        news_database = processed_articles  # Directly assign the processed articles
+        print("Updated news_database:")
+        for article in news_database:
+            print(f"Title: {article['title']}, Score: {article['relevance_score']}")
         return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
     except Exception as e:
 def export_news_to_excel():
     global news_database
+    if not news_database:
+        return "No articles to export. Please fetch news first."
+    print("Exporting the following articles:")
+    for article in news_database:
+        print(f"Title: {article['title']}, Score: {article.get('relevance_score', 'N/A')}")
+    df = pd.DataFrame(news_database)
     # Ensure relevance_score is present and convert to float
     if 'relevance_score' not in df.columns:
     else:
         df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
+    # Use the cleaned summary for the Excel export
+    if 'cleaned_summary' in df.columns:
+        df['summary'] = df['cleaned_summary']
+        df = df.drop(columns=['cleaned_summary'])
     # Reorder columns to put relevance_score after summary
     columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
+    df = df[[col for col in columns if col in df.columns]]
+    print("Final DataFrame before export:")
+    print(df[['title', 'relevance_score']])
     with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
         excel_path = tmp.name
         df.to_excel(excel_path, index=False, engine='openpyxl')
+        print(f"Excel file saved to: {excel_path}")
     return excel_path