File size: 11,256 Bytes
7516245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
from datetime import datetime
import re
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.constants import Send
from LLMS import get_llm, get_planner
from LLMS.models import Section, Subsection
from state import BlogState, WorkerState

def orchestrator(state: BlogState):
    """Orchestrator that generates a plan for the blog based on news items"""
    
    try:
        # Parse the content to extract news items
        content_lines = state['content'].split('\n\n')
        news_items = []
        current_item = {}
        
        for content_block in content_lines:
            if content_block.startswith('TITLE:'):
                # Start of a new item
                if current_item and 'title' in current_item:
                    news_items.append(current_item)
                current_item = {}
                
                lines = content_block.split('\n')
                for line in lines:
                    if line.startswith('TITLE:'):
                        current_item['title'] = line.replace('TITLE:', '').strip()
                    elif line.startswith('SOURCE:'):
                        current_item['source'] = line.replace('SOURCE:', '').strip()
                    elif line.startswith('URL:'):
                        current_item['url'] = line.replace('URL:', '').strip()
                    elif line.startswith('DESCRIPTION:'):
                        current_item['description'] = line.replace('DESCRIPTION:', '').strip()
                    elif line.startswith('CONTENT:'):
                        current_item['content'] = line.replace('CONTENT:', '').strip()
            elif 'content' in current_item:
                # Add to existing content
                current_item['content'] += ' ' + content_block
        
        # Add the last item
        if current_item and 'title' in current_item:
            news_items.append(current_item)
        
        # Group news items by category
        ai_tech_items = []
        ai_business_items = []
        ai_research_items = []
        
        for item in news_items:
            title = item.get('title', '').lower()
            description = item.get('description', '').lower()
            
            # Simple categorization based on keywords
            if any(kw in title + description for kw in ['business', 'market', 'company', 'investment', 'startup']):
                ai_business_items.append(item)
            elif any(kw in title + description for kw in ['research', 'study', 'paper', 'university']):
                ai_research_items.append(item)
            else:
                ai_tech_items.append(item)
        
        # Create sections with subsections
        sections = []
        
        # AI Technology section
        if ai_tech_items:
            tech_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in ai_tech_items
            ]
            
            sections.append(Section(
                name="AI Technology Developments",
                description="Recent advancements in AI technology and applications",
                information="Cover the latest developments in AI technology.",
                subsections=tech_subsections
            ))
        
        # AI Business section
        if ai_business_items:
            business_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in ai_business_items
            ]
            
            sections.append(Section(
                name="AI in Business",
                description="How AI is transforming industries and markets",
                information="Focus on business applications and market trends in AI.",
                subsections=business_subsections
            ))
        
        # AI Research section
        if ai_research_items:
            research_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in ai_research_items
            ]
            
            sections.append(Section(
                name="AI Research and Studies",
                description="Latest research findings and academic work in AI",
                information="Cover recent research papers and studies in AI.",
                subsections=research_subsections
            ))
        
        # If no items were categorized, create a general section
        if not sections:
            general_subsections = [
                Subsection(
                    title=item['title'],
                    source=item['source'],
                    url=item['url'],
                    content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
                ) for item in news_items
            ]
            
            sections.append(Section(
                name="Latest AI News",
                description="Roundup of the latest AI news from around the web",
                information="Cover a range of AI news topics.",
                subsections=general_subsections
            ))
        
        return {"sections": sections}
    except Exception as e:
        print(f"Error in orchestrator: {str(e)}")
        # Fallback plan if structured output fails
        fallback_sections = [
            Section(
                name="Latest AI Developments",
                description="Overview of recent AI advancements and research",
                information="Summarize the latest AI developments from the provided content.",
                subsections=[]
            )
        ]
        return {"sections": fallback_sections}

def llm_call(state: WorkerState):
    """Worker writes a section of the blog with subsections for each news item"""
    
    section = state['section']
    
    # Generate section header without ID for cleaner markdown
    section_header = f"## {section.name}\n\n{section.description}\n"
    
    # If there are subsections, process each one
    subsections_content = ""
    llm = get_llm()
    
    if section.subsections:
        for idx, subsection in enumerate(section.subsections):
            # Generate subsection using LLM
            subsection_prompt = f"""
Write a detailed subsection about this AI news item:
Title: {subsection.title}
Source: {subsection.source}
URL: {subsection.url}

Content to summarize and expand on:
{subsection.content}

Keep your response focused on the news item and make it engaging. Use markdown formatting.
"""
            
            subsection_content = llm.invoke([
                SystemMessage(content="You are writing a subsection for an AI news blog. Write in a professional but engaging style. Include key details and insights. Use markdown formatting."),
                HumanMessage(content=subsection_prompt)
            ])
            
            # Format subsection with title and source (without ID tags)
            formatted_subsection = f"### {subsection.title}\n\n"
            formatted_subsection += f"*Source: [{subsection.source}]({subsection.url})*\n\n"
            formatted_subsection += subsection_content.content
            
            subsections_content += formatted_subsection + "\n\n"
    else:
        # If no subsections, generate the full section content
        section_content = llm.invoke([
            SystemMessage(content="Write a blog section following the provided name, description, and information. Include no preamble. Use markdown formatting."),
            HumanMessage(content=f"Here is the section name: {section.name}\nDescription: {section.description}\nInformation: {section.information}")
        ])
        subsections_content = section_content.content
    
    # Combine section header and subsections
    complete_section = section_header + subsections_content
    
    # Return the completed section
    return {"completed_sections": [complete_section]}

def synthesizer(state: BlogState):
    """Synthesize full blog from sections with proper formatting and hierarchical TOC"""
    
    # List of completed sections
    completed_sections = state["completed_sections"]
    
    # Format completed sections into a full blog post
    completed_report = "\n\n".join(completed_sections)
    
    # Add title, date, and introduction
    today = datetime.now().strftime("%Y-%m-%d")
    blog_title = f"# AI News Roundup - {today}"
    
    # Generate a brief introduction
    llm = get_llm()
    intro = llm.invoke([
        SystemMessage(content="Write a brief introduction for an AI news roundup blog post. Keep it under 100 words. Be engaging and professional."),
        HumanMessage(content=f"Today's date is {today}. Write a brief introduction for an AI news roundup.")
    ])
    
    # Create hierarchical table of contents
    table_of_contents = "## Table of Contents\n\n"
    
    # Find all section headings (## headings)
    section_matches = re.findall(r'## ([^\n]+)', completed_report)
    
    for i, section_name in enumerate(section_matches, 1):
        # Add section to TOC with auto-generated link
        # Create a clean anchor from the section name
        section_anchor = section_name.lower().replace(' ', '-')
        table_of_contents += f"{i}. [{section_name}](#{section_anchor})\n"
        
        # Find all subsections within this section
        section_start = completed_report.find(f"## {section_name}")
        next_section_match = re.search(r'## ', completed_report[section_start+1:])
        if next_section_match:
            section_end = section_start + 1 + next_section_match.start()
            section_text = completed_report[section_start:section_end]
        else:
            section_text = completed_report[section_start:]
        
        # Extract subsection headings
        subsection_matches = re.findall(r'### ([^\n]+)', section_text)
        
        for j, subsection_name in enumerate(subsection_matches, 1):
            # Create a clean anchor from the subsection name
            subsection_anchor = subsection_name.lower().replace(' ', '-').replace(':', '').replace('?', '').replace('!', '').replace('.', '')
            # Add subsection to TOC with proper indentation
            table_of_contents += f"   {i}.{j}. [{subsection_name}](#{subsection_anchor})\n"
    
    final_report = f"{blog_title}\n\n{intro.content}\n\n{table_of_contents}\n\n---\n\n{completed_report}\n\n---\n\n*This AI News Roundup was automatically generated on {today}.*"
    
    return {"final_report": final_report}

def assign_workers(state: BlogState):
    """Assign a worker to each section in the plan"""
    
    # Kick off section writing in parallel
    return [Send("llm_call", {"section": s}) for s in state["sections"]]