File size: 11,256 Bytes
7516245 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
from datetime import datetime
import re
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.constants import Send
from LLMS import get_llm, get_planner
from LLMS.models import Section, Subsection
from state import BlogState, WorkerState
def orchestrator(state: BlogState):
"""Orchestrator that generates a plan for the blog based on news items"""
try:
# Parse the content to extract news items
content_lines = state['content'].split('\n\n')
news_items = []
current_item = {}
for content_block in content_lines:
if content_block.startswith('TITLE:'):
# Start of a new item
if current_item and 'title' in current_item:
news_items.append(current_item)
current_item = {}
lines = content_block.split('\n')
for line in lines:
if line.startswith('TITLE:'):
current_item['title'] = line.replace('TITLE:', '').strip()
elif line.startswith('SOURCE:'):
current_item['source'] = line.replace('SOURCE:', '').strip()
elif line.startswith('URL:'):
current_item['url'] = line.replace('URL:', '').strip()
elif line.startswith('DESCRIPTION:'):
current_item['description'] = line.replace('DESCRIPTION:', '').strip()
elif line.startswith('CONTENT:'):
current_item['content'] = line.replace('CONTENT:', '').strip()
elif 'content' in current_item:
# Add to existing content
current_item['content'] += ' ' + content_block
# Add the last item
if current_item and 'title' in current_item:
news_items.append(current_item)
# Group news items by category
ai_tech_items = []
ai_business_items = []
ai_research_items = []
for item in news_items:
title = item.get('title', '').lower()
description = item.get('description', '').lower()
# Simple categorization based on keywords
if any(kw in title + description for kw in ['business', 'market', 'company', 'investment', 'startup']):
ai_business_items.append(item)
elif any(kw in title + description for kw in ['research', 'study', 'paper', 'university']):
ai_research_items.append(item)
else:
ai_tech_items.append(item)
# Create sections with subsections
sections = []
# AI Technology section
if ai_tech_items:
tech_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in ai_tech_items
]
sections.append(Section(
name="AI Technology Developments",
description="Recent advancements in AI technology and applications",
information="Cover the latest developments in AI technology.",
subsections=tech_subsections
))
# AI Business section
if ai_business_items:
business_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in ai_business_items
]
sections.append(Section(
name="AI in Business",
description="How AI is transforming industries and markets",
information="Focus on business applications and market trends in AI.",
subsections=business_subsections
))
# AI Research section
if ai_research_items:
research_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in ai_research_items
]
sections.append(Section(
name="AI Research and Studies",
description="Latest research findings and academic work in AI",
information="Cover recent research papers and studies in AI.",
subsections=research_subsections
))
# If no items were categorized, create a general section
if not sections:
general_subsections = [
Subsection(
title=item['title'],
source=item['source'],
url=item['url'],
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
) for item in news_items
]
sections.append(Section(
name="Latest AI News",
description="Roundup of the latest AI news from around the web",
information="Cover a range of AI news topics.",
subsections=general_subsections
))
return {"sections": sections}
except Exception as e:
print(f"Error in orchestrator: {str(e)}")
# Fallback plan if structured output fails
fallback_sections = [
Section(
name="Latest AI Developments",
description="Overview of recent AI advancements and research",
information="Summarize the latest AI developments from the provided content.",
subsections=[]
)
]
return {"sections": fallback_sections}
def llm_call(state: WorkerState):
"""Worker writes a section of the blog with subsections for each news item"""
section = state['section']
# Generate section header without ID for cleaner markdown
section_header = f"## {section.name}\n\n{section.description}\n"
# If there are subsections, process each one
subsections_content = ""
llm = get_llm()
if section.subsections:
for idx, subsection in enumerate(section.subsections):
# Generate subsection using LLM
subsection_prompt = f"""
Write a detailed subsection about this AI news item:
Title: {subsection.title}
Source: {subsection.source}
URL: {subsection.url}
Content to summarize and expand on:
{subsection.content}
Keep your response focused on the news item and make it engaging. Use markdown formatting.
"""
subsection_content = llm.invoke([
SystemMessage(content="You are writing a subsection for an AI news blog. Write in a professional but engaging style. Include key details and insights. Use markdown formatting."),
HumanMessage(content=subsection_prompt)
])
# Format subsection with title and source (without ID tags)
formatted_subsection = f"### {subsection.title}\n\n"
formatted_subsection += f"*Source: [{subsection.source}]({subsection.url})*\n\n"
formatted_subsection += subsection_content.content
subsections_content += formatted_subsection + "\n\n"
else:
# If no subsections, generate the full section content
section_content = llm.invoke([
SystemMessage(content="Write a blog section following the provided name, description, and information. Include no preamble. Use markdown formatting."),
HumanMessage(content=f"Here is the section name: {section.name}\nDescription: {section.description}\nInformation: {section.information}")
])
subsections_content = section_content.content
# Combine section header and subsections
complete_section = section_header + subsections_content
# Return the completed section
return {"completed_sections": [complete_section]}
def synthesizer(state: BlogState):
"""Synthesize full blog from sections with proper formatting and hierarchical TOC"""
# List of completed sections
completed_sections = state["completed_sections"]
# Format completed sections into a full blog post
completed_report = "\n\n".join(completed_sections)
# Add title, date, and introduction
today = datetime.now().strftime("%Y-%m-%d")
blog_title = f"# AI News Roundup - {today}"
# Generate a brief introduction
llm = get_llm()
intro = llm.invoke([
SystemMessage(content="Write a brief introduction for an AI news roundup blog post. Keep it under 100 words. Be engaging and professional."),
HumanMessage(content=f"Today's date is {today}. Write a brief introduction for an AI news roundup.")
])
# Create hierarchical table of contents
table_of_contents = "## Table of Contents\n\n"
# Find all section headings (## headings)
section_matches = re.findall(r'## ([^\n]+)', completed_report)
for i, section_name in enumerate(section_matches, 1):
# Add section to TOC with auto-generated link
# Create a clean anchor from the section name
section_anchor = section_name.lower().replace(' ', '-')
table_of_contents += f"{i}. [{section_name}](#{section_anchor})\n"
# Find all subsections within this section
section_start = completed_report.find(f"## {section_name}")
next_section_match = re.search(r'## ', completed_report[section_start+1:])
if next_section_match:
section_end = section_start + 1 + next_section_match.start()
section_text = completed_report[section_start:section_end]
else:
section_text = completed_report[section_start:]
# Extract subsection headings
subsection_matches = re.findall(r'### ([^\n]+)', section_text)
for j, subsection_name in enumerate(subsection_matches, 1):
# Create a clean anchor from the subsection name
subsection_anchor = subsection_name.lower().replace(' ', '-').replace(':', '').replace('?', '').replace('!', '').replace('.', '')
# Add subsection to TOC with proper indentation
table_of_contents += f" {i}.{j}. [{subsection_name}](#{subsection_anchor})\n"
final_report = f"{blog_title}\n\n{intro.content}\n\n{table_of_contents}\n\n---\n\n{completed_report}\n\n---\n\n*This AI News Roundup was automatically generated on {today}.*"
return {"final_report": final_report}
def assign_workers(state: BlogState):
"""Assign a worker to each section in the plan"""
# Kick off section writing in parallel
return [Send("llm_call", {"section": s}) for s in state["sections"]] |