|
from datetime import datetime |
|
import re |
|
from langchain_core.messages import HumanMessage, SystemMessage |
|
from langgraph.constants import Send |
|
from LLMS import get_llm, get_planner |
|
from LLMS.models import Section, Subsection |
|
from state import BlogState, WorkerState |
|
|
|
def orchestrator(state: BlogState): |
|
"""Orchestrator that generates a plan for the blog based on news items""" |
|
|
|
try: |
|
|
|
content_lines = state['content'].split('\n\n') |
|
news_items = [] |
|
current_item = {} |
|
|
|
for content_block in content_lines: |
|
if content_block.startswith('TITLE:'): |
|
|
|
if current_item and 'title' in current_item: |
|
news_items.append(current_item) |
|
current_item = {} |
|
|
|
lines = content_block.split('\n') |
|
for line in lines: |
|
if line.startswith('TITLE:'): |
|
current_item['title'] = line.replace('TITLE:', '').strip() |
|
elif line.startswith('SOURCE:'): |
|
current_item['source'] = line.replace('SOURCE:', '').strip() |
|
elif line.startswith('URL:'): |
|
current_item['url'] = line.replace('URL:', '').strip() |
|
elif line.startswith('DESCRIPTION:'): |
|
current_item['description'] = line.replace('DESCRIPTION:', '').strip() |
|
elif line.startswith('CONTENT:'): |
|
current_item['content'] = line.replace('CONTENT:', '').strip() |
|
elif 'content' in current_item: |
|
|
|
current_item['content'] += ' ' + content_block |
|
|
|
|
|
if current_item and 'title' in current_item: |
|
news_items.append(current_item) |
|
|
|
|
|
ai_tech_items = [] |
|
ai_business_items = [] |
|
ai_research_items = [] |
|
|
|
for item in news_items: |
|
title = item.get('title', '').lower() |
|
description = item.get('description', '').lower() |
|
|
|
|
|
if any(kw in title + description for kw in ['business', 'market', 'company', 'investment', 'startup']): |
|
ai_business_items.append(item) |
|
elif any(kw in title + description for kw in ['research', 'study', 'paper', 'university']): |
|
ai_research_items.append(item) |
|
else: |
|
ai_tech_items.append(item) |
|
|
|
|
|
sections = [] |
|
|
|
|
|
if ai_tech_items: |
|
tech_subsections = [ |
|
Subsection( |
|
title=item['title'], |
|
source=item['source'], |
|
url=item['url'], |
|
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..." |
|
) for item in ai_tech_items |
|
] |
|
|
|
sections.append(Section( |
|
name="AI Technology Developments", |
|
description="Recent advancements in AI technology and applications", |
|
information="Cover the latest developments in AI technology.", |
|
subsections=tech_subsections |
|
)) |
|
|
|
|
|
if ai_business_items: |
|
business_subsections = [ |
|
Subsection( |
|
title=item['title'], |
|
source=item['source'], |
|
url=item['url'], |
|
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..." |
|
) for item in ai_business_items |
|
] |
|
|
|
sections.append(Section( |
|
name="AI in Business", |
|
description="How AI is transforming industries and markets", |
|
information="Focus on business applications and market trends in AI.", |
|
subsections=business_subsections |
|
)) |
|
|
|
|
|
if ai_research_items: |
|
research_subsections = [ |
|
Subsection( |
|
title=item['title'], |
|
source=item['source'], |
|
url=item['url'], |
|
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..." |
|
) for item in ai_research_items |
|
] |
|
|
|
sections.append(Section( |
|
name="AI Research and Studies", |
|
description="Latest research findings and academic work in AI", |
|
information="Cover recent research papers and studies in AI.", |
|
subsections=research_subsections |
|
)) |
|
|
|
|
|
if not sections: |
|
general_subsections = [ |
|
Subsection( |
|
title=item['title'], |
|
source=item['source'], |
|
url=item['url'], |
|
content=f"{item.get('description', '')} {item.get('content', '')[:500]}..." |
|
) for item in news_items |
|
] |
|
|
|
sections.append(Section( |
|
name="Latest AI News", |
|
description="Roundup of the latest AI news from around the web", |
|
information="Cover a range of AI news topics.", |
|
subsections=general_subsections |
|
)) |
|
|
|
return {"sections": sections} |
|
except Exception as e: |
|
print(f"Error in orchestrator: {str(e)}") |
|
|
|
fallback_sections = [ |
|
Section( |
|
name="Latest AI Developments", |
|
description="Overview of recent AI advancements and research", |
|
information="Summarize the latest AI developments from the provided content.", |
|
subsections=[] |
|
) |
|
] |
|
return {"sections": fallback_sections} |
|
|
|
def llm_call(state: WorkerState): |
|
"""Worker writes a section of the blog with subsections for each news item""" |
|
|
|
section = state['section'] |
|
|
|
|
|
section_header = f"## {section.name}\n\n{section.description}\n" |
|
|
|
|
|
subsections_content = "" |
|
llm = get_llm() |
|
|
|
if section.subsections: |
|
for idx, subsection in enumerate(section.subsections): |
|
|
|
subsection_prompt = f""" |
|
Write a detailed subsection about this AI news item: |
|
Title: {subsection.title} |
|
Source: {subsection.source} |
|
URL: {subsection.url} |
|
|
|
Content to summarize and expand on: |
|
{subsection.content} |
|
|
|
Keep your response focused on the news item and make it engaging. Use markdown formatting. |
|
""" |
|
|
|
subsection_content = llm.invoke([ |
|
SystemMessage(content="You are writing a subsection for an AI news blog. Write in a professional but engaging style. Include key details and insights. Use markdown formatting."), |
|
HumanMessage(content=subsection_prompt) |
|
]) |
|
|
|
|
|
formatted_subsection = f"### {subsection.title}\n\n" |
|
formatted_subsection += f"*Source: [{subsection.source}]({subsection.url})*\n\n" |
|
formatted_subsection += subsection_content.content |
|
|
|
subsections_content += formatted_subsection + "\n\n" |
|
else: |
|
|
|
section_content = llm.invoke([ |
|
SystemMessage(content="Write a blog section following the provided name, description, and information. Include no preamble. Use markdown formatting."), |
|
HumanMessage(content=f"Here is the section name: {section.name}\nDescription: {section.description}\nInformation: {section.information}") |
|
]) |
|
subsections_content = section_content.content |
|
|
|
|
|
complete_section = section_header + subsections_content |
|
|
|
|
|
return {"completed_sections": [complete_section]} |
|
|
|
def synthesizer(state: BlogState): |
|
"""Synthesize full blog from sections with proper formatting and hierarchical TOC""" |
|
|
|
|
|
completed_sections = state["completed_sections"] |
|
|
|
|
|
completed_report = "\n\n".join(completed_sections) |
|
|
|
|
|
today = datetime.now().strftime("%Y-%m-%d") |
|
blog_title = f"# AI News Roundup - {today}" |
|
|
|
|
|
llm = get_llm() |
|
intro = llm.invoke([ |
|
SystemMessage(content="Write a brief introduction for an AI news roundup blog post. Keep it under 100 words. Be engaging and professional."), |
|
HumanMessage(content=f"Today's date is {today}. Write a brief introduction for an AI news roundup.") |
|
]) |
|
|
|
|
|
table_of_contents = "## Table of Contents\n\n" |
|
|
|
|
|
section_matches = re.findall(r'## ([^\n]+)', completed_report) |
|
|
|
for i, section_name in enumerate(section_matches, 1): |
|
|
|
|
|
section_anchor = section_name.lower().replace(' ', '-') |
|
table_of_contents += f"{i}. [{section_name}](#{section_anchor})\n" |
|
|
|
|
|
section_start = completed_report.find(f"## {section_name}") |
|
next_section_match = re.search(r'## ', completed_report[section_start+1:]) |
|
if next_section_match: |
|
section_end = section_start + 1 + next_section_match.start() |
|
section_text = completed_report[section_start:section_end] |
|
else: |
|
section_text = completed_report[section_start:] |
|
|
|
|
|
subsection_matches = re.findall(r'### ([^\n]+)', section_text) |
|
|
|
for j, subsection_name in enumerate(subsection_matches, 1): |
|
|
|
subsection_anchor = subsection_name.lower().replace(' ', '-').replace(':', '').replace('?', '').replace('!', '').replace('.', '') |
|
|
|
table_of_contents += f" {i}.{j}. [{subsection_name}](#{subsection_anchor})\n" |
|
|
|
final_report = f"{blog_title}\n\n{intro.content}\n\n{table_of_contents}\n\n---\n\n{completed_report}\n\n---\n\n*This AI News Roundup was automatically generated on {today}.*" |
|
|
|
return {"final_report": final_report} |
|
|
|
def assign_workers(state: BlogState): |
|
"""Assign a worker to each section in the plan""" |
|
|
|
|
|
return [Send("llm_call", {"section": s}) for s in state["sections"]] |