# import json
# import time
# from utils.news_extraction_api import fetch_articles
# from utils.news_summarisation import summarize_text
# from utils.news_sentiment import analyze_sentiment
# from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words
# from utils.comparative_analysis import comparative_sentiment_analysis
# from utils.text_to_speech import text_to_speech

# def main():
#     company = input("Enter the company name for analysis: ").strip()

#     # Extract news articles
#     start_time = time.time()
#     articles = fetch_articles(company, num_articles=2)  # Fetch 2    articles
#     extraction_time = time.time() - start_time
#     print(f"✅ Articles extracted in {extraction_time:.2f} seconds")

#     if not articles:
#         print("⚠️ No news articles found. Try a different company.")
#         return

#     articles_data = []
#     all_topics = []  # Collect all topics for better analysis

#     for article in articles:
#         text = article.get("content", "").strip()

#         if not text:
#             print(f"⚠️ Skipping article '{article.get('title', 'No Title')}' due to missing content.")
#             continue

#         # Perform sentiment analysis
#         start_time = time.time()
#         sentiment_result = analyze_sentiment([text])
#         sentiment = sentiment_result.get("Predicted Sentiment", ["Unknown"])[0]
#         sentiment_time = time.time() - start_time
#         print(f"✅ Sentiment analysis completed in {sentiment_time:.2f} seconds")

#         # Summarize the article
#         start_time = time.time()
#         summary = summarize_text(text)
#         summary_time = time.time() - start_time
#         print(f"✅ Summary generation completed in {summary_time:.2f} seconds")

#         # Extract topics
#         start_time = time.time()
#         preprocessed_text = preprocess_text([text])
#         if not preprocessed_text:
#             print(f"⚠️ No meaningful text extracted for LDA topic modeling in '{article.get('title', 'No Title')}'.")
#             topic_words = []
#         else:
#             lda_model, dictionary = train_lda(preprocessed_text)
#             topic_words = extract_topic_words(lda_model)
#             topic_time = time.time() - start_time       
#             print(f"✅ Topic extraction completed in {topic_time:.2f} seconds")

#         # Store processed data
#         articles_data.append({
#             "Title": article.get("title", "No Title"),
#             "Summary": summary,
#             "Sentiment": sentiment,
#             "Topics": topic_words if topic_words else []
#         })

#         # Collect topics for comparative analysis
#         if topic_words:
#             all_topics.extend(topic_words)

#     # Ensure articles_data is not empty before analysis
#     if not articles_data:
#         print("⚠️ No valid articles with content were processed.")
#         return

#     # Perform comparative sentiment analysis
#     start_time = time.time()
#     analysis_result = comparative_sentiment_analysis(company, articles_data)
#     analysis_time = time.time() - start_time
#     print(f"✅ Comparative sentiment analysis completed in {analysis_time:.2f} seconds")

#     # Correctly extract "Comparative Sentiment Score"
#     comparative_score = analysis_result.get("Comparative Sentiment Score", {})

#     sentiment_distribution = comparative_score.get("Sentiment Distribution", {})
#     coverage_differences = comparative_score.get("Coverage Differences", {})
#     topic_overlap = comparative_score.get("Topic Overlap", [])

#     # Debugging check
#     if not sentiment_distribution:
#         print("⚠️ No sentiment distribution detected.")
#     if not coverage_differences:
#         print("⚠️ No coverage differences found.")
#     if not topic_overlap:
#         print("⚠️ No topic overlap detected among articles.")

#     # Final sentiment summary
#     final_sentiment_analysis = analysis_result.get("Final Sentiment Analysis", "Analysis could not be completed.")

#     # Generate summary speech
#     start_time = time.time()
#     final_summary = f"{company}’s latest news coverage is mostly {final_sentiment_analysis}."
#     audio_file = text_to_speech(final_summary)
#     audio_time = time.time() - start_time
#     print(f"✅ Summary speech generation completed in {audio_time:.2f} seconds")

#     # Construct final JSON output
#     output = {
#         "Company": company,
#         "Articles": articles_data,
#         "Comparative Sentiment Score": {
#             "Sentiment Distribution": sentiment_distribution,
#             "Coverage Differences": coverage_differences,
#             "Topic Overlap": topic_overlap
#         },
#         "Extracted Topics": list(set(all_topics)),  # Unique topics across articles
#         "Final Sentiment Analysis": final_summary,
#         "Audio": f"[Play {audio_file}]"
#     }

#     # Print JSON output
#     print(json.dumps(output, indent=4, ensure_ascii=False))

#     # Save JSON output
#     with open(f"{company}_news_analysis.json", "w", encoding="utf-8") as json_file:
#         json.dump(output, json_file, indent=4, ensure_ascii=False)

# if __name__ == "__main__":
#     main()


import json
import time
from utils.news_extraction_api import extract_news
from utils.news_summarisation import summarize_text
from utils.news_sentiment import analyze_sentiment
from utils.topic_extraction import preprocess_text, train_lda, extract_topic_words
from utils.comparative_analysis import comparative_sentiment_analysis
from utils.text_to_speech import text_to_speech  

def analyze_company_news(company):
    # Extract news articles
    start_time = time.time()
    articles = extract_news(company)
    extraction_time = time.time() - start_time

    if not articles:
        return {"message": "No news articles found. Try a different company."}

    articles_data = []  # List to store processed articles

    # Extract texts from articles for sentiment analysis
    texts = [article["content"] for article in articles]

    # Perform sentiment analysis
    start_time = time.time()
    sentiment_results = analyze_sentiment(texts)
    sentiment_time = time.time() - start_time

    # Process each article
    for i, (article, sentiment) in enumerate(zip(articles, sentiment_results["Predicted Sentiment"]), start=1):
        start_time = time.time()
        summary = summarize_text(article["content"])  # Summarize article
        summarization_time = time.time() - start_time

        # Extract topics for the specific article
        preprocessed_text = preprocess_text([article["content"]])
        lda_model, dictionary = train_lda(preprocessed_text)
        topic_words = extract_topic_words(lda_model)

        article_entry = {
            "Title": article["title"],
            "Summary": summary,
            "Sentiment": sentiment,
            "Topics": topic_words
        }
        articles_data.append(article_entry)

    # Perform comparative sentiment analysis
    analysis_result = comparative_sentiment_analysis(company, articles_data)

    # Generate a summary speech for the entire report
    final_summary = f"{company}’s latest news coverage is mostly {analysis_result['Final Sentiment Analysis']}."
    audio_file = text_to_speech(final_summary)  # Generate TTS

    # Construct final JSON output
    output = {
        "Company": company,
        "Articles": articles_data,
        "Comparative Sentiment Score": analysis_result,
        "Audio": f"[Play {audio_file}]"  # Include a playable reference
    }

    return output

if __name__ == "__main__":
    company = input("Enter the company name for analysis: ").strip()
    result = analyze_company_news(company)
    print(json.dumps(result, indent=4, ensure_ascii=False))