AiNewsV2 / nodes /blog_nodes.py
Phoenix21's picture
Modular code
7516245
from datetime import datetime
import re
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.constants import Send
from LLMS import get_llm, get_planner
from LLMS.models import Section, Subsection
from state import BlogState, WorkerState
def orchestrator(state: BlogState):
"""Orchestrator that generates a plan for the blog based on news items"""
try:
# Parse the content to extract news items
content_lines = state['content'].split('\n\n')
news_items = []
current_item = {}
for content_block in content_lines:
if content_block.startswith('TITLE:'):
# Start of a new item
if current_item and 'title' in current_item:
news_items.append(current_item)
current_item = {}
lines = content_block.split('\n')
for line in lines:
if line.startswith('TITLE:'):
current_item['title'] = line.replace('TITLE:', '').strip()
elif line.startswith('SOURCE:'):
current_item['source'] = line.replace('SOURCE:', '').strip()
elif line.startswith('URL:'):
current_item['url'] = line.replace('URL:', '').strip()
elif line.startswith('DESCRIPTION:'):
current_item['description'] = line.replace('DESCRIPTION:', '').strip()
elif line.startswith('CONTENT:'):
current_item['content'] = line.replace('CONTENT:', '').strip()
elif 'content' in current_item:
# Add to existing content
current_item['content'] += ' ' + content_block
# Add the last item
if current_item and 'title' in current_item:
news_items.append(current_item)
# Group news items by category
ai_tech_items = []
ai_business_items = []
ai_research_items = []
for item in news_items:
title = item.get('title', '').lower()
description = item.get('description', '').lower()
# Simple categorization based on keywords
if any(kw in title + description for kw in ['business', 'market', 'company', 'investment', 'startup']):
ai_business_items.append(item)
elif any(kw in title + description for kw in ['research', 'study', 'paper', 'university']):
ai_research_items.append(item)
else:
ai_tech_items.append(item)
# Create sections with subsections
sections = []
# AI Technology section
if ai_tech_items:
tech_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in ai_tech_items
]
sections.append(Section(
name="AI Technology Developments",
description="Recent advancements in AI technology and applications",
information="Cover the latest developments in AI technology.",
subsections=tech_subsections
))
# AI Business section
if ai_business_items:
business_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in ai_business_items
]
sections.append(Section(
name="AI in Business",
description="How AI is transforming industries and markets",
information="Focus on business applications and market trends in AI.",
subsections=business_subsections
))
# AI Research section
if ai_research_items:
research_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in ai_research_items
]
sections.append(Section(
name="AI Research and Studies",
description="Latest research findings and academic work in AI",
information="Cover recent research papers and studies in AI.",
subsections=research_subsections
))
# If no items were categorized, create a general section
if not sections:
general_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in news_items
]
sections.append(Section(
name="Latest AI News",
description="Roundup of the latest AI news from around the web",
information="Cover a range of AI news topics.",
subsections=general_subsections
))
return {"sections": sections}
except Exception as e:
print(f"Error in orchestrator: {str(e)}")
# Fallback plan if structured output fails
fallback_sections = [
Section(
name="Latest AI Developments",
description="Overview of recent AI advancements and research",
information="Summarize the latest AI developments from the provided content.",
subsections=[]
)
]
return {"sections": fallback_sections}
def llm_call(state: WorkerState):
"""Worker writes a section of the blog with subsections for each news item"""
section = state['section']
# Generate section header without ID for cleaner markdown
section_header = f"## {section.name}\n\n{section.description}\n"
# If there are subsections, process each one
subsections_content = ""
llm = get_llm()
if section.subsections:
for idx, subsection in enumerate(section.subsections):
# Generate subsection using LLM
subsection_prompt = f"""
Write a detailed subsection about this AI news item:
Title: {subsection.title}
Source: {subsection.source}
URL: {subsection.url}
Content to summarize and expand on:
{subsection.content}
Keep your response focused on the news item and make it engaging. Use markdown formatting.
"""
subsection_content = llm.invoke([
SystemMessage(content="You are writing a subsection for an AI news blog. Write in a professional but engaging style. Include key details and insights. Use markdown formatting."),
HumanMessage(content=subsection_prompt)
])
# Format subsection with title and source (without ID tags)
formatted_subsection = f"### {subsection.title}\n\n"
formatted_subsection += f"*Source: [{subsection.source}]({subsection.url})*\n\n"
formatted_subsection += subsection_content.content
subsections_content += formatted_subsection + "\n\n"
else:
# If no subsections, generate the full section content
section_content = llm.invoke([
SystemMessage(content="Write a blog section following the provided name, description, and information. Include no preamble. Use markdown formatting."),
HumanMessage(content=f"Here is the section name: {section.name}\nDescription: {section.description}\nInformation: {section.information}")
])
subsections_content = section_content.content
# Combine section header and subsections
complete_section = section_header + subsections_content
# Return the completed section
return {"completed_sections": [complete_section]}
def synthesizer(state: BlogState):
"""Synthesize full blog from sections with proper formatting and hierarchical TOC"""
# List of completed sections
completed_sections = state["completed_sections"]
# Format completed sections into a full blog post
completed_report = "\n\n".join(completed_sections)
# Add title, date, and introduction
today = datetime.now().strftime("%Y-%m-%d")
blog_title = f"# AI News Roundup - {today}"
# Generate a brief introduction
llm = get_llm()
intro = llm.invoke([
SystemMessage(content="Write a brief introduction for an AI news roundup blog post. Keep it under 100 words. Be engaging and professional."),
HumanMessage(content=f"Today's date is {today}. Write a brief introduction for an AI news roundup.")
])
# Create hierarchical table of contents
table_of_contents = "## Table of Contents\n\n"
# Find all section headings (## headings)
section_matches = re.findall(r'## ([^\n]+)', completed_report)
for i, section_name in enumerate(section_matches, 1):
# Add section to TOC with auto-generated link
# Create a clean anchor from the section name
section_anchor = section_name.lower().replace(' ', '-')
table_of_contents += f"{i}. [{section_name}](#{section_anchor})\n"
# Find all subsections within this section
section_start = completed_report.find(f"## {section_name}")
next_section_match = re.search(r'## ', completed_report[section_start+1:])
if next_section_match:
section_end = section_start + 1 + next_section_match.start()
section_text = completed_report[section_start:section_end]
else:
section_text = completed_report[section_start:]
# Extract subsection headings
subsection_matches = re.findall(r'### ([^\n]+)', section_text)
for j, subsection_name in enumerate(subsection_matches, 1):
# Create a clean anchor from the subsection name
subsection_anchor = subsection_name.lower().replace(' ', '-').replace(':', '').replace('?', '').replace('!', '').replace('.', '')
# Add subsection to TOC with proper indentation
table_of_contents += f" {i}.{j}. [{subsection_name}](#{subsection_anchor})\n"
final_report = f"{blog_title}\n\n{intro.content}\n\n{table_of_contents}\n\n---\n\n{completed_report}\n\n---\n\n*This AI News Roundup was automatically generated on {today}.*"
return {"final_report": final_report}
def assign_workers(state: BlogState):
"""Assign a worker to each section in the plan"""
# Kick off section writing in parallel
return [Send("llm_call", {"section": s}) for s in state["sections"]]