Spaces:

Phoenix21
/

AiNewsV2

Sleeping

File size: 11,256 Bytes
from datetime import datetime
import re
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.constants import Send
from LLMS import get_llm, get_planner
from LLMS.models import Section, Subsection
from state import BlogState, WorkerState

def orchestrator(state: BlogState):
    """Orchestrator that generates a plan for the blog based on news items"""
    
    try:
        # Parse the content to extract news items
        content_lines = state['content'].split('\n\n')
        news_items = []
        current_item = {}
        
        for content_block in content_lines:
            if content_block.startswith('TITLE:'):
                # Start of a new item
                if current_item and 'title' in current_item:
                    news_items.append(current_item)
                current_item = {}
                
                lines = content_block.split('\n')
                for line in lines:
                    if line.startswith('TITLE:'):
                        current_item['title'] = line.replace('TITLE:', '').strip()
                    elif line.startswith('SOURCE:'):
                        current_item['source'] = line.replace('SOURCE:', '').strip()
                    elif line.startswith('URL:'):
                        current_item['url'] = line.replace('URL:', '').strip()
                    elif line.startswith('DESCRIPTION:'):
                        current_item['description'] = line.replace('DESCRIPTION:', '').strip()
                    elif line.startswith('CONTENT:'):
                        current_item['content'] = line.replace('CONTENT:', '').strip()
            elif 'content' in current_item:
                # Add to existing content
                current_item['content'] += ' ' + content_block
        
        # Add the last item
        if current_item and 'title' in current_item:
            news_items.append(current_item)
        
        # Group news items by category
        ai_tech_items = []
        ai_business_items = []
        ai_research_items = []
        
        for item in news_items:
            title = item.get('title', '').lower()
            description = item.get('description', '').lower()
            
            # Simple categorization based on keywords
            if any(kw in title + description for kw in ['business', 'market', 'company', 'investment', 'startup']):
                ai_business_items.append(item)
            elif any(kw in title + description for kw in ['research', 'study', 'paper', 'university']):
                ai_research_items.append(item)
            else:
                ai_tech_items.append(item)
        
        # Create sections with subsections
        sections = []
        
        # AI Technology section
        if ai_tech_items:
            tech_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in ai_tech_items
            ]
            
            sections.append(Section(
                name="AI Technology Developments",
                description="Recent advancements in AI technology and applications",
                information="Cover the latest developments in AI technology.",
                subsections=tech_subsections
            ))
        
        # AI Business section
        if ai_business_items:
            business_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in ai_business_items
            ]
            
            sections.append(Section(
                name="AI in Business",
                description="How AI is transforming industries and markets",
                information="Focus on business applications and market trends in AI.",
                subsections=business_subsections
            ))
        
        # AI Research section
        if ai_research_items:
            research_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in ai_research_items
            ]
            
            sections.append(Section(
                name="AI Research and Studies",
                description="Latest research findings and academic work in AI",
                information="Cover recent research papers and studies in AI.",
                subsections=research_subsections
            ))
        
        # If no items were categorized, create a general section
        if not sections:
            general_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in news_items
            ]
            
            sections.append(Section(
                name="Latest AI News",
                description="Roundup of the latest AI news from around the web",
                information="Cover a range of AI news topics.",
                subsections=general_subsections
            ))
        
        return {"sections": sections}
    except Exception as e:
        print(f"Error in orchestrator: {str(e)}")
        # Fallback plan if structured output fails
        fallback_sections = [
            Section(
                name="Latest AI Developments",
                description="Overview of recent AI advancements and research",
                information="Summarize the latest AI developments from the provided content.",
                subsections=[]
            )
        ]
        return {"sections": fallback_sections}

def llm_call(state: WorkerState):
    """Worker writes a section of the blog with subsections for each news item"""
    
    section = state['section']
    
    # Generate section header without ID for cleaner markdown
    section_header = f"## {section.name}\n\n{section.description}\n"
    
    # If there are subsections, process each one
    subsections_content = ""
    llm = get_llm()
    
    if section.subsections:
        for idx, subsection in enumerate(section.subsections):
            # Generate subsection using LLM
            subsection_prompt = f"""
Write a detailed subsection about this AI news item:
Title: {subsection.title}
Source: {subsection.source}
URL: {subsection.url}

Content to summarize and expand on:
{subsection.content}

Keep your response focused on the news item and make it engaging. Use markdown formatting.
"""
            
            subsection_content = llm.invoke([
                SystemMessage(content="You are writing a subsection for an AI news blog. Write in a professional but engaging style. Include key details and insights. Use markdown formatting."),
                HumanMessage(content=subsection_prompt)
            ])
            
            # Format subsection with title and source (without ID tags)
            formatted_subsection = f"### {subsection.title}\n\n"
            formatted_subsection += f"*Source: [{subsection.source}]({subsection.url})*\n\n"
            formatted_subsection += subsection_content.content
            
            subsections_content += formatted_subsection + "\n\n"
    else:
        # If no subsections, generate the full section content
        section_content = llm.invoke([
            SystemMessage(content="Write a blog section following the provided name, description, and information. Include no preamble. Use markdown formatting."),
            HumanMessage(content=f"Here is the section name: {section.name}\nDescription: {section.description}\nInformation: {section.information}")
        ])
        subsections_content = section_content.content
    
    # Combine section header and subsections
    complete_section = section_header + subsections_content
    
    # Return the completed section
    return {"completed_sections": [complete_section]}

def synthesizer(state: BlogState):
    """Synthesize full blog from sections with proper formatting and hierarchical TOC"""
    
    # List of completed sections
    completed_sections = state["completed_sections"]
    
    # Format completed sections into a full blog post
    completed_report = "\n\n".join(completed_sections)
    
    # Add title, date, and introduction
    today = datetime.now().strftime("%Y-%m-%d")
    blog_title = f"# AI News Roundup - {today}"
    
    # Generate a brief introduction
    llm = get_llm()
    intro = llm.invoke([
        SystemMessage(content="Write a brief introduction for an AI news roundup blog post. Keep it under 100 words. Be engaging and professional."),
        HumanMessage(content=f"Today's date is {today}. Write a brief introduction for an AI news roundup.")
    ])
    
    # Create hierarchical table of contents
    table_of_contents = "## Table of Contents\n\n"
    
    # Find all section headings (## headings)
    section_matches = re.findall(r'## ([^\n]+)', completed_report)
    
    for i, section_name in enumerate(section_matches, 1):
        # Add section to TOC with auto-generated link
        # Create a clean anchor from the section name
        section_anchor = section_name.lower().replace(' ', '-')
        table_of_contents += f"{i}. [{section_name}](#{section_anchor})\n"
        
        # Find all subsections within this section
        section_start = completed_report.find(f"## {section_name}")
        next_section_match = re.search(r'## ', completed_report[section_start+1:])
        if next_section_match:
            section_end = section_start + 1 + next_section_match.start()
            section_text = completed_report[section_start:section_end]
        else:
            section_text = completed_report[section_start:]
        
        # Extract subsection headings
        subsection_matches = re.findall(r'### ([^\n]+)', section_text)
        
        for j, subsection_name in enumerate(subsection_matches, 1):
            # Create a clean anchor from the subsection name
            subsection_anchor = subsection_name.lower().replace(' ', '-').replace(':', '').replace('?', '').replace('!', '').replace('.', '')
            # Add subsection to TOC with proper indentation
            table_of_contents += f"   {i}.{j}. [{subsection_name}](#{subsection_anchor})\n"
    
    final_report = f"{blog_title}\n\n{intro.content}\n\n{table_of_contents}\n\n---\n\n{completed_report}\n\n---\n\n*This AI News Roundup was automatically generated on {today}.*"
    
    return {"final_report": final_report}

def assign_workers(state: BlogState):
    """Assign a worker to each section in the plan"""
    
    # Kick off section writing in parallel
    return [Send("llm_call", {"section": s}) for s in state["sections"]]