# import json | |
# import time | |
# from utils.news_extraction_api import fetch_articles | |
# from utils.news_summarisation import summarize_text | |
# from utils.news_sentiment import analyze_sentiment | |
# from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words | |
# from utils.comparative_analysis import comparative_sentiment_analysis | |
# from utils.text_to_speech import text_to_speech | |
# def main(): | |
# company = input("Enter the company name for analysis: ").strip() | |
# # Extract news articles | |
# start_time = time.time() | |
# articles = fetch_articles(company, num_articles=2) # Fetch 2 articles | |
# extraction_time = time.time() - start_time | |
# print(f"✅ Articles extracted in {extraction_time:.2f} seconds") | |
# if not articles: | |
# print("⚠️ No news articles found. Try a different company.") | |
# return | |
# articles_data = [] | |
# all_topics = [] # Collect all topics for better analysis | |
# for article in articles: | |
# text = article.get("content", "").strip() | |
# if not text: | |
# print(f"⚠️ Skipping article '{article.get('title', 'No Title')}' due to missing content.") | |
# continue | |
# # Perform sentiment analysis | |
# start_time = time.time() | |
# sentiment_result = analyze_sentiment([text]) | |
# sentiment = sentiment_result.get("Predicted Sentiment", ["Unknown"])[0] | |
# sentiment_time = time.time() - start_time | |
# print(f"✅ Sentiment analysis completed in {sentiment_time:.2f} seconds") | |
# # Summarize the article | |
# start_time = time.time() | |
# summary = summarize_text(text) | |
# summary_time = time.time() - start_time | |
# print(f"✅ Summary generation completed in {summary_time:.2f} seconds") | |
# # Extract topics | |
# start_time = time.time() | |
# preprocessed_text = preprocess_text([text]) | |
# if not preprocessed_text: | |
# print(f"⚠️ No meaningful text extracted for LDA topic modeling in '{article.get('title', 'No Title')}'.") | |
# topic_words = [] | |
# else: | |
# lda_model, dictionary = train_lda(preprocessed_text) | |
# topic_words = extract_topic_words(lda_model) | |
# topic_time = time.time() - start_time | |
# print(f"✅ Topic extraction completed in {topic_time:.2f} seconds") | |
# # Store processed data | |
# articles_data.append({ | |
# "Title": article.get("title", "No Title"), | |
# "Summary": summary, | |
# "Sentiment": sentiment, | |
# "Topics": topic_words if topic_words else [] | |
# }) | |
# # Collect topics for comparative analysis | |
# if topic_words: | |
# all_topics.extend(topic_words) | |
# # Ensure articles_data is not empty before analysis | |
# if not articles_data: | |
# print("⚠️ No valid articles with content were processed.") | |
# return | |
# # Perform comparative sentiment analysis | |
# start_time = time.time() | |
# analysis_result = comparative_sentiment_analysis(company, articles_data) | |
# analysis_time = time.time() - start_time | |
# print(f"✅ Comparative sentiment analysis completed in {analysis_time:.2f} seconds") | |
# # Correctly extract "Comparative Sentiment Score" | |
# comparative_score = analysis_result.get("Comparative Sentiment Score", {}) | |
# sentiment_distribution = comparative_score.get("Sentiment Distribution", {}) | |
# coverage_differences = comparative_score.get("Coverage Differences", {}) | |
# topic_overlap = comparative_score.get("Topic Overlap", []) | |
# # Debugging check | |
# if not sentiment_distribution: | |
# print("⚠️ No sentiment distribution detected.") | |
# if not coverage_differences: | |
# print("⚠️ No coverage differences found.") | |
# if not topic_overlap: | |
# print("⚠️ No topic overlap detected among articles.") | |
# # Final sentiment summary | |
# final_sentiment_analysis = analysis_result.get("Final Sentiment Analysis", "Analysis could not be completed.") | |
# # Generate summary speech | |
# start_time = time.time() | |
# final_summary = f"{company}’s latest news coverage is mostly {final_sentiment_analysis}." | |
# audio_file = text_to_speech(final_summary) | |
# audio_time = time.time() - start_time | |
# print(f"✅ Summary speech generation completed in {audio_time:.2f} seconds") | |
# # Construct final JSON output | |
# output = { | |
# "Company": company, | |
# "Articles": articles_data, | |
# "Comparative Sentiment Score": { | |
# "Sentiment Distribution": sentiment_distribution, | |
# "Coverage Differences": coverage_differences, | |
# "Topic Overlap": topic_overlap | |
# }, | |
# "Extracted Topics": list(set(all_topics)), # Unique topics across articles | |
# "Final Sentiment Analysis": final_summary, | |
# "Audio": f"[Play {audio_file}]" | |
# } | |
# # Print JSON output | |
# print(json.dumps(output, indent=4, ensure_ascii=False)) | |
# # Save JSON output | |
# with open(f"{company}_news_analysis.json", "w", encoding="utf-8") as json_file: | |
# json.dump(output, json_file, indent=4, ensure_ascii=False) | |
# if __name__ == "__main__": | |
# main() | |
import json | |
import time | |
from utils.news_extraction_api import extract_news | |
from utils.news_summarisation import summarize_text | |
from utils.news_sentiment import analyze_sentiment | |
from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words | |
from utils.comparative_analysis import comparative_sentiment_analysis | |
from utils.text_to_speech import text_to_speech | |
def analyze_company_news(company): | |
# Extract news articles | |
start_time = time.time() | |
articles = extract_news(company) | |
extraction_time = time.time() - start_time | |
if not articles: | |
return {"message": "No news articles found. Try a different company."} | |
articles_data = [] # List to store processed articles | |
# Extract texts from articles for sentiment analysis | |
texts = [article["content"] for article in articles] | |
# Perform sentiment analysis | |
start_time = time.time() | |
sentiment_results = analyze_sentiment(texts) | |
sentiment_time = time.time() - start_time | |
# Process each article | |
for i, (article, sentiment) in enumerate(zip(articles, sentiment_results["Predicted Sentiment"]), start=1): | |
start_time = time.time() | |
summary = summarize_text(article["content"]) # Summarize article | |
summarization_time = time.time() - start_time | |
# Extract topics for the specific article | |
preprocessed_text = preprocess_text([article["content"]]) | |
lda_model, dictionary = train_lda(preprocessed_text) | |
topic_words = extract_topic_words(lda_model) | |
article_entry = { | |
"Title": article["title"], | |
"Summary": summary, | |
"Sentiment": sentiment, | |
"Topics": topic_words | |
} | |
articles_data.append(article_entry) | |
# Perform comparative sentiment analysis | |
analysis_result = comparative_sentiment_analysis(company, articles_data) | |
# Generate a summary speech for the entire report | |
final_summary = f"{company}’s latest news coverage is mostly {analysis_result['Final Sentiment Analysis']}." | |
audio_file = text_to_speech(final_summary) # Generate TTS | |
# Construct final JSON output | |
output = { | |
"Company": company, | |
"Articles": articles_data, | |
"Comparative Sentiment Score": analysis_result, | |
"Audio": f"[Play {audio_file}]" # Include a playable reference | |
} | |
return output | |
if __name__ == "__main__": | |
company = input("Enter the company name for analysis: ").strip() | |
result = analyze_company_news(company) | |
print(json.dumps(result, indent=4, ensure_ascii=False)) |