v1shal's picture
first_commit
b396e94
# import json
# import time
# from utils.news_extraction_api import fetch_articles
# from utils.news_summarisation import summarize_text
# from utils.news_sentiment import analyze_sentiment
# from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words
# from utils.comparative_analysis import comparative_sentiment_analysis
# from utils.text_to_speech import text_to_speech
# def main():
# company = input("Enter the company name for analysis: ").strip()
# # Extract news articles
# start_time = time.time()
# articles = fetch_articles(company, num_articles=2) # Fetch 2 articles
# extraction_time = time.time() - start_time
# print(f"✅ Articles extracted in {extraction_time:.2f} seconds")
# if not articles:
# print("⚠️ No news articles found. Try a different company.")
# return
# articles_data = []
# all_topics = [] # Collect all topics for better analysis
# for article in articles:
# text = article.get("content", "").strip()
# if not text:
# print(f"⚠️ Skipping article '{article.get('title', 'No Title')}' due to missing content.")
# continue
# # Perform sentiment analysis
# start_time = time.time()
# sentiment_result = analyze_sentiment([text])
# sentiment = sentiment_result.get("Predicted Sentiment", ["Unknown"])[0]
# sentiment_time = time.time() - start_time
# print(f"✅ Sentiment analysis completed in {sentiment_time:.2f} seconds")
# # Summarize the article
# start_time = time.time()
# summary = summarize_text(text)
# summary_time = time.time() - start_time
# print(f"✅ Summary generation completed in {summary_time:.2f} seconds")
# # Extract topics
# start_time = time.time()
# preprocessed_text = preprocess_text([text])
# if not preprocessed_text:
# print(f"⚠️ No meaningful text extracted for LDA topic modeling in '{article.get('title', 'No Title')}'.")
# topic_words = []
# else:
# lda_model, dictionary = train_lda(preprocessed_text)
# topic_words = extract_topic_words(lda_model)
# topic_time = time.time() - start_time
# print(f"✅ Topic extraction completed in {topic_time:.2f} seconds")
# # Store processed data
# articles_data.append({
# "Title": article.get("title", "No Title"),
# "Summary": summary,
# "Sentiment": sentiment,
# "Topics": topic_words if topic_words else []
# })
# # Collect topics for comparative analysis
# if topic_words:
# all_topics.extend(topic_words)
# # Ensure articles_data is not empty before analysis
# if not articles_data:
# print("⚠️ No valid articles with content were processed.")
# return
# # Perform comparative sentiment analysis
# start_time = time.time()
# analysis_result = comparative_sentiment_analysis(company, articles_data)
# analysis_time = time.time() - start_time
# print(f"✅ Comparative sentiment analysis completed in {analysis_time:.2f} seconds")
# # Correctly extract "Comparative Sentiment Score"
# comparative_score = analysis_result.get("Comparative Sentiment Score", {})
# sentiment_distribution = comparative_score.get("Sentiment Distribution", {})
# coverage_differences = comparative_score.get("Coverage Differences", {})
# topic_overlap = comparative_score.get("Topic Overlap", [])
# # Debugging check
# if not sentiment_distribution:
# print("⚠️ No sentiment distribution detected.")
# if not coverage_differences:
# print("⚠️ No coverage differences found.")
# if not topic_overlap:
# print("⚠️ No topic overlap detected among articles.")
# # Final sentiment summary
# final_sentiment_analysis = analysis_result.get("Final Sentiment Analysis", "Analysis could not be completed.")
# # Generate summary speech
# start_time = time.time()
# final_summary = f"{company}’s latest news coverage is mostly {final_sentiment_analysis}."
# audio_file = text_to_speech(final_summary)
# audio_time = time.time() - start_time
# print(f"✅ Summary speech generation completed in {audio_time:.2f} seconds")
# # Construct final JSON output
# output = {
# "Company": company,
# "Articles": articles_data,
# "Comparative Sentiment Score": {
# "Sentiment Distribution": sentiment_distribution,
# "Coverage Differences": coverage_differences,
# "Topic Overlap": topic_overlap
# },
# "Extracted Topics": list(set(all_topics)), # Unique topics across articles
# "Final Sentiment Analysis": final_summary,
# "Audio": f"[Play {audio_file}]"
# }
# # Print JSON output
# print(json.dumps(output, indent=4, ensure_ascii=False))
# # Save JSON output
# with open(f"{company}_news_analysis.json", "w", encoding="utf-8") as json_file:
# json.dump(output, json_file, indent=4, ensure_ascii=False)
# if __name__ == "__main__":
# main()
import json
import time
from utils.news_extraction_api import extract_news
from utils.news_summarisation import summarize_text
from utils.news_sentiment import analyze_sentiment
from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words
from utils.comparative_analysis import comparative_sentiment_analysis
from utils.text_to_speech import text_to_speech
def analyze_company_news(company):
# Extract news articles
start_time = time.time()
articles = extract_news(company)
extraction_time = time.time() - start_time
if not articles:
return {"message": "No news articles found. Try a different company."}
articles_data = [] # List to store processed articles
# Extract texts from articles for sentiment analysis
texts = [article["content"] for article in articles]
# Perform sentiment analysis
start_time = time.time()
sentiment_results = analyze_sentiment(texts)
sentiment_time = time.time() - start_time
# Process each article
for i, (article, sentiment) in enumerate(zip(articles, sentiment_results["Predicted Sentiment"]), start=1):
start_time = time.time()
summary = summarize_text(article["content"]) # Summarize article
summarization_time = time.time() - start_time
# Extract topics for the specific article
preprocessed_text = preprocess_text([article["content"]])
lda_model, dictionary = train_lda(preprocessed_text)
topic_words = extract_topic_words(lda_model)
article_entry = {
"Title": article["title"],
"Summary": summary,
"Sentiment": sentiment,
"Topics": topic_words
}
articles_data.append(article_entry)
# Perform comparative sentiment analysis
analysis_result = comparative_sentiment_analysis(company, articles_data)
# Generate a summary speech for the entire report
final_summary = f"{company}’s latest news coverage is mostly {analysis_result['Final Sentiment Analysis']}."
audio_file = text_to_speech(final_summary) # Generate TTS
# Construct final JSON output
output = {
"Company": company,
"Articles": articles_data,
"Comparative Sentiment Score": analysis_result,
"Audio": f"[Play {audio_file}]" # Include a playable reference
}
return output
if __name__ == "__main__":
company = input("Enter the company name for analysis: ").strip()
result = analyze_company_news(company)
print(json.dumps(result, indent=4, ensure_ascii=False))