Spaces:

Phoenix21
/

AiNewsV2

Running

App Files Files Community

AiNewsV2 / nodes /blog_nodes.py

Phoenix21

Modular code

7516245 4 months ago

raw

history blame contribute delete

11.3 kB

	from datetime import datetime
	import re
	from langchain_core.messages import HumanMessage, SystemMessage
	from langgraph.constants import Send
	from LLMS import get_llm, get_planner
	from LLMS.models import Section, Subsection
	from state import BlogState, WorkerState

	def orchestrator(state: BlogState):
	"""Orchestrator that generates a plan for the blog based on news items"""

	try:
	# Parse the content to extract news items
	content_lines = state['content'].split('\n\n')
	news_items = []
	current_item = {}

	for content_block in content_lines:
	if content_block.startswith('TITLE:'):
	# Start of a new item
	if current_item and 'title' in current_item:
	news_items.append(current_item)
	current_item = {}

	lines = content_block.split('\n')
	for line in lines:
	if line.startswith('TITLE:'):
	current_item['title'] = line.replace('TITLE:', '').strip()
	elif line.startswith('SOURCE:'):
	current_item['source'] = line.replace('SOURCE:', '').strip()
	elif line.startswith('URL:'):
	current_item['url'] = line.replace('URL:', '').strip()
	elif line.startswith('DESCRIPTION:'):
	current_item['description'] = line.replace('DESCRIPTION:', '').strip()
	elif line.startswith('CONTENT:'):
	current_item['content'] = line.replace('CONTENT:', '').strip()
	elif 'content' in current_item:
	# Add to existing content
	current_item['content'] += ' ' + content_block

	# Add the last item
	if current_item and 'title' in current_item:
	news_items.append(current_item)

	# Group news items by category
	ai_tech_items = []
	ai_business_items = []
	ai_research_items = []

	for item in news_items:
	title = item.get('title', '').lower()
	description = item.get('description', '').lower()

	# Simple categorization based on keywords
	if any(kw in title + description for kw in ['business', 'market', 'company', 'investment', 'startup']):
	ai_business_items.append(item)
	elif any(kw in title + description for kw in ['research', 'study', 'paper', 'university']):
	ai_research_items.append(item)
	else:
	ai_tech_items.append(item)

	# Create sections with subsections
	sections = []

	# AI Technology section
	if ai_tech_items:
	tech_subsections = [
	Subsection(
	title=item['title'],
	source=item['source'],
	url=item['url'],
	content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
	) for item in ai_tech_items
	]

	sections.append(Section(
	name="AI Technology Developments",
	description="Recent advancements in AI technology and applications",
	information="Cover the latest developments in AI technology.",
	subsections=tech_subsections
	))

	# AI Business section
	if ai_business_items:
	business_subsections = [
	Subsection(
	title=item['title'],
	source=item['source'],
	url=item['url'],
	content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
	) for item in ai_business_items
	]

	sections.append(Section(
	name="AI in Business",
	description="How AI is transforming industries and markets",
	information="Focus on business applications and market trends in AI.",
	subsections=business_subsections
	))

	# AI Research section
	if ai_research_items:
	research_subsections = [
	Subsection(
	title=item['title'],
	source=item['source'],
	url=item['url'],
	content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
	) for item in ai_research_items
	]

	sections.append(Section(
	name="AI Research and Studies",
	description="Latest research findings and academic work in AI",
	information="Cover recent research papers and studies in AI.",
	subsections=research_subsections
	))

	# If no items were categorized, create a general section
	if not sections:
	general_subsections = [
	Subsection(
	title=item['title'],
	source=item['source'],
	url=item['url'],
	content=f"{item.get('description', '')} {item.get('content', '')[:500]}..."
	) for item in news_items
	]

	sections.append(Section(
	name="Latest AI News",
	description="Roundup of the latest AI news from around the web",
	information="Cover a range of AI news topics.",
	subsections=general_subsections
	))

	return {"sections": sections}
	except Exception as e:
	print(f"Error in orchestrator: {str(e)}")
	# Fallback plan if structured output fails
	fallback_sections = [
	Section(
	name="Latest AI Developments",
	description="Overview of recent AI advancements and research",
	information="Summarize the latest AI developments from the provided content.",
	subsections=[]
	)
	]
	return {"sections": fallback_sections}

	def llm_call(state: WorkerState):
	"""Worker writes a section of the blog with subsections for each news item"""

	section = state['section']

	# Generate section header without ID for cleaner markdown
	section_header = f"## {section.name}\n\n{section.description}\n"

	# If there are subsections, process each one
	subsections_content = ""
	llm = get_llm()

	if section.subsections:
	for idx, subsection in enumerate(section.subsections):
	# Generate subsection using LLM
	subsection_prompt = f"""
	Write a detailed subsection about this AI news item:
	Title: {subsection.title}
	Source: {subsection.source}
	URL: {subsection.url}

	Content to summarize and expand on:
	{subsection.content}

	Keep your response focused on the news item and make it engaging. Use markdown formatting.
	"""

	subsection_content = llm.invoke([
	SystemMessage(content="You are writing a subsection for an AI news blog. Write in a professional but engaging style. Include key details and insights. Use markdown formatting."),
	HumanMessage(content=subsection_prompt)
	])

	# Format subsection with title and source (without ID tags)
	formatted_subsection = f"### {subsection.title}\n\n"
	formatted_subsection += f"Source: [{subsection.source}]({subsection.url})\n\n"
	formatted_subsection += subsection_content.content

	subsections_content += formatted_subsection + "\n\n"
	else:
	# If no subsections, generate the full section content
	section_content = llm.invoke([
	SystemMessage(content="Write a blog section following the provided name, description, and information. Include no preamble. Use markdown formatting."),
	HumanMessage(content=f"Here is the section name: {section.name}\nDescription: {section.description}\nInformation: {section.information}")
	])
	subsections_content = section_content.content

	# Combine section header and subsections
	complete_section = section_header + subsections_content

	# Return the completed section
	return {"completed_sections": [complete_section]}

	def synthesizer(state: BlogState):
	"""Synthesize full blog from sections with proper formatting and hierarchical TOC"""

	# List of completed sections
	completed_sections = state["completed_sections"]

	# Format completed sections into a full blog post
	completed_report = "\n\n".join(completed_sections)

	# Add title, date, and introduction
	today = datetime.now().strftime("%Y-%m-%d")
	blog_title = f"# AI News Roundup - {today}"

	# Generate a brief introduction
	llm = get_llm()
	intro = llm.invoke([
	SystemMessage(content="Write a brief introduction for an AI news roundup blog post. Keep it under 100 words. Be engaging and professional."),
	HumanMessage(content=f"Today's date is {today}. Write a brief introduction for an AI news roundup.")
	])

	# Create hierarchical table of contents
	table_of_contents = "## Table of Contents\n\n"

	# Find all section headings (## headings)
	section_matches = re.findall(r'## ([^\n]+)', completed_report)

	for i, section_name in enumerate(section_matches, 1):
	# Add section to TOC with auto-generated link
	# Create a clean anchor from the section name
	section_anchor = section_name.lower().replace(' ', '-')
	table_of_contents += f"{i}. [{section_name}](#{section_anchor})\n"

	# Find all subsections within this section
	section_start = completed_report.find(f"## {section_name}")
	next_section_match = re.search(r'## ', completed_report[section_start+1:])
	if next_section_match:
	section_end = section_start + 1 + next_section_match.start()
	section_text = completed_report[section_start:section_end]
	else:
	section_text = completed_report[section_start:]

	# Extract subsection headings
	subsection_matches = re.findall(r'### ([^\n]+)', section_text)

	for j, subsection_name in enumerate(subsection_matches, 1):
	# Create a clean anchor from the subsection name
	subsection_anchor = subsection_name.lower().replace(' ', '-').replace(':', '').replace('?', '').replace('!', '').replace('.', '')
	# Add subsection to TOC with proper indentation
	table_of_contents += f" {i}.{j}. [{subsection_name}](#{subsection_anchor})\n"

	final_report = f"{blog_title}\n\n{intro.content}\n\n{table_of_contents}\n\n---\n\n{completed_report}\n\n---\n\nThis AI News Roundup was automatically generated on {today}."

	return {"final_report": final_report}

	def assign_workers(state: BlogState):
	"""Assign a worker to each section in the plan"""

	# Kick off section writing in parallel
	return [Send("llm_call", {"section": s}) for s in state["sections"]]