# import json # import time # from utils.news_extraction_api import fetch_articles # from utils.news_summarisation import summarize_text # from utils.news_sentiment import analyze_sentiment # from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words # from utils.comparative_analysis import comparative_sentiment_analysis # from utils.text_to_speech import text_to_speech # def main(): # company = input("Enter the company name for analysis: ").strip() # # Extract news articles # start_time = time.time() # articles = fetch_articles(company, num_articles=2) # Fetch 2 articles # extraction_time = time.time() - start_time # print(f"✅ Articles extracted in {extraction_time:.2f} seconds") # if not articles: # print("⚠️ No news articles found. Try a different company.") # return # articles_data = [] # all_topics = [] # Collect all topics for better analysis # for article in articles: # text = article.get("content", "").strip() # if not text: # print(f"⚠️ Skipping article '{article.get('title', 'No Title')}' due to missing content.") # continue # # Perform sentiment analysis # start_time = time.time() # sentiment_result = analyze_sentiment([text]) # sentiment = sentiment_result.get("Predicted Sentiment", ["Unknown"])[0] # sentiment_time = time.time() - start_time # print(f"✅ Sentiment analysis completed in {sentiment_time:.2f} seconds") # # Summarize the article # start_time = time.time() # summary = summarize_text(text) # summary_time = time.time() - start_time # print(f"✅ Summary generation completed in {summary_time:.2f} seconds") # # Extract topics # start_time = time.time() # preprocessed_text = preprocess_text([text]) # if not preprocessed_text: # print(f"⚠️ No meaningful text extracted for LDA topic modeling in '{article.get('title', 'No Title')}'.") # topic_words = [] # else: # lda_model, dictionary = train_lda(preprocessed_text) # topic_words = extract_topic_words(lda_model) # topic_time = time.time() - start_time # print(f"✅ Topic extraction completed in {topic_time:.2f} seconds") # # Store processed data # articles_data.append({ # "Title": article.get("title", "No Title"), # "Summary": summary, # "Sentiment": sentiment, # "Topics": topic_words if topic_words else [] # }) # # Collect topics for comparative analysis # if topic_words: # all_topics.extend(topic_words) # # Ensure articles_data is not empty before analysis # if not articles_data: # print("⚠️ No valid articles with content were processed.") # return # # Perform comparative sentiment analysis # start_time = time.time() # analysis_result = comparative_sentiment_analysis(company, articles_data) # analysis_time = time.time() - start_time # print(f"✅ Comparative sentiment analysis completed in {analysis_time:.2f} seconds") # # Correctly extract "Comparative Sentiment Score" # comparative_score = analysis_result.get("Comparative Sentiment Score", {}) # sentiment_distribution = comparative_score.get("Sentiment Distribution", {}) # coverage_differences = comparative_score.get("Coverage Differences", {}) # topic_overlap = comparative_score.get("Topic Overlap", []) # # Debugging check # if not sentiment_distribution: # print("⚠️ No sentiment distribution detected.") # if not coverage_differences: # print("⚠️ No coverage differences found.") # if not topic_overlap: # print("⚠️ No topic overlap detected among articles.") # # Final sentiment summary # final_sentiment_analysis = analysis_result.get("Final Sentiment Analysis", "Analysis could not be completed.") # # Generate summary speech # start_time = time.time() # final_summary = f"{company}’s latest news coverage is mostly {final_sentiment_analysis}." # audio_file = text_to_speech(final_summary) # audio_time = time.time() - start_time # print(f"✅ Summary speech generation completed in {audio_time:.2f} seconds") # # Construct final JSON output # output = { # "Company": company, # "Articles": articles_data, # "Comparative Sentiment Score": { # "Sentiment Distribution": sentiment_distribution, # "Coverage Differences": coverage_differences, # "Topic Overlap": topic_overlap # }, # "Extracted Topics": list(set(all_topics)), # Unique topics across articles # "Final Sentiment Analysis": final_summary, # "Audio": f"[Play {audio_file}]" # } # # Print JSON output # print(json.dumps(output, indent=4, ensure_ascii=False)) # # Save JSON output # with open(f"{company}_news_analysis.json", "w", encoding="utf-8") as json_file: # json.dump(output, json_file, indent=4, ensure_ascii=False) # if __name__ == "__main__": # main() import json import time from utils.news_extraction_api import extract_news from utils.news_summarisation import summarize_text from utils.news_sentiment import analyze_sentiment from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words from utils.comparative_analysis import comparative_sentiment_analysis from utils.text_to_speech import text_to_speech def analyze_company_news(company): # Extract news articles start_time = time.time() articles = extract_news(company) extraction_time = time.time() - start_time if not articles: return {"message": "No news articles found. Try a different company."} articles_data = [] # List to store processed articles # Extract texts from articles for sentiment analysis texts = [article["content"] for article in articles] # Perform sentiment analysis start_time = time.time() sentiment_results = analyze_sentiment(texts) sentiment_time = time.time() - start_time # Process each article for i, (article, sentiment) in enumerate(zip(articles, sentiment_results["Predicted Sentiment"]), start=1): start_time = time.time() summary = summarize_text(article["content"]) # Summarize article summarization_time = time.time() - start_time # Extract topics for the specific article preprocessed_text = preprocess_text([article["content"]]) lda_model, dictionary = train_lda(preprocessed_text) topic_words = extract_topic_words(lda_model) article_entry = { "Title": article["title"], "Summary": summary, "Sentiment": sentiment, "Topics": topic_words } articles_data.append(article_entry) # Perform comparative sentiment analysis analysis_result = comparative_sentiment_analysis(company, articles_data) # Generate a summary speech for the entire report final_summary = f"{company}’s latest news coverage is mostly {analysis_result['Final Sentiment Analysis']}." audio_file = text_to_speech(final_summary) # Generate TTS # Construct final JSON output output = { "Company": company, "Articles": articles_data, "Comparative Sentiment Score": analysis_result, "Audio": f"[Play {audio_file}]" # Include a playable reference } return output if __name__ == "__main__": company = input("Enter the company name for analysis: ").strip() result = analyze_company_news(company) print(json.dumps(result, indent=4, ensure_ascii=False))